Commit f5b3cfd1 authored by Jonas Müller's avatar Jonas Müller

cross validation ds creation code

parent 19d548b2
......@@ -31,7 +31,9 @@ def time_diff(date1, date2):
# print()
num_folds = 5
min_time_diff = 3 * 60 # N minutes --> in seconds
min = 3
seconds_time_diff = min * 60 # N minutes --> in seconds
print('Minimum minutes time difference:', min)
output_path = 'data/cross_validate/'
# Create empty lists for each fold as sublist of folds
folds = []
......@@ -39,12 +41,14 @@ for i in range(num_folds):
folds.append([])
for i in range(num_folds):
filename_set = set()
new_dir = output_path + 'fold_' + str(i)
with open(new_dir+'/annotations_test.txt', 'r') as file:
lines = file.readlines()
for line in lines:
filename = line.split(',')[0]
folds[i].append(filename)
filename_set.add(filename)
folds[i] = list(filename_set)
for fold in folds:
fold.sort()
......@@ -65,7 +69,7 @@ for i in range(len(folds)):
for elem1 in folds[i]:
for elem2 in folds[j]:
diff, d1, d2 = time_diff(elem1, elem2)
if diff < min_time_diff:
if diff < seconds_time_diff:
print(str(diff / 60) + " min Zeitdifferenz")
print(str(d1) + " - " + str(d2))
print(elem1 + " - " + elem2)
......
......@@ -16,7 +16,7 @@ def get_date2(filename):
return filename[:13]
def get_date(filename):
return get_date2(filename)[:-2]
return get_date2(filename)[:-5]
#20190520_120309
......
......@@ -7,35 +7,6 @@ from pathlib import Path
from keras_frcnn.simple_parser import get_data
def get_date2(filename):
if filename.startswith('IMG'):
filename = filename[4:]
#return filename[:8] # only dates
if filename == '20190508-WA0026.jpg':
filename = '20190508_123000.jpg'
return filename[:13]
def get_date(filename):
return get_date2(filename)[:-5]
#20190520_120309
def time_diff(date1, date2):
date1 = get_date2(date1)
date2 = get_date2(date2)
d1 = datetime.strptime(date1, '%Y%m%d_%H%M')
d2 = datetime.strptime(date2, '%Y%m%d_%H%M')
tmp = None
if d1 < d2:
tmp = d1
d1 = d2
d2 = tmp
duration = d1 - d2 # For build-in functions
duration_in_s = duration.total_seconds()
return duration_in_s, d1, d2
#if duration_in_s < 600:
# print(duration_in_s)
# print()
parser = OptionParser()
......@@ -50,50 +21,31 @@ if not options.annotation_all: # if filename is not given
parser.error('Error: path to annotation file data must be specified. Pass --path to command line')
all_data, classes_count, class_mapping = get_data(options.annotation_all)
dates = {}
index_dict = {}
for idx, entry in enumerate(all_data):
filename = Path(entry['filepath']).name
date = get_date(filename)
print(date)
if date not in dates.keys():
dates[date] = [idx]
else:
dates[date].append(idx)
index_dict[idx] = (filename, entry['filepath'])
print(dates)
# print(index_dict)
len_of_dates = {}
for date in dates.keys():
if len(dates[date]) not in len_of_dates.keys():
len_of_dates[len(dates[date])] = [date]
else:
len_of_dates[len(dates[date])].append(date)
print(len_of_dates)
# prepare ordered list containing the appearing length in the dataset
keys_of_len_of_dates = list(len_of_dates.keys())
keys_of_len_of_dates.sort(reverse=True)
index_dict[filename] = idx
# Create empty lists for each fold as sublist of folds
folds = []
for i in range(options.num_folds):
folds.append([])
# Append always the next longest sublist to the next shortest fold
for key in keys_of_len_of_dates:
for date in len_of_dates[key]:
#find first min len fold
min_fold = 0
for idx, fold in enumerate(folds):
if len(fold) < len(folds[min_fold]):
min_fold = idx
folds[min_fold].extend(dates[date])
#print(folds)
# load Folds
for i in range(options.num_folds):
filename_set = set()
new_dir = output_path + 'fold_' + str(i)
with open(new_dir+'/annotations_test.txt', 'r') as file:
lines = file.readlines()
for line in lines:
filename = line.split(',')[0]
filename_set.add(filename)
folds[i] = list(filename_set)
for fold in folds:
fold.sort()
......@@ -128,16 +80,10 @@ for i in range(len(folds)):
if not os.path.exists(new_dir):
os.makedirs(new_dir)
with open(new_dir+'/annotations_test.txt', 'w') as file:
lines = []
for filenumber in test[i]:
for box in all_data[filenumber]['bboxes']:
lines.append(index_dict[filenumber][0] + ',' + str(box['x1']) + ',' + str(box['y1']) + ',' + str(box['x2']) + ',' + str(box['y2']) + ',' + box['class'] + '\n')
file.writelines(lines)
with open(new_dir+'/annotations_train.txt', 'w') as file:
lines = []
for filenumber in train[i]:
for box in all_data[filenumber]['bboxes']:
lines.append(index_dict[filenumber][0] + ',' + str(box['x1']) + ',' + str(box['y1']) + ',' + str(box['x2']) + ',' + str(box['y2']) + ',' + box['class'] + '\n')
for filename in train[i]:
idx = index_dict[filename]
for box in all_data[idx]['bboxes']:
lines.append(filename + ',' + str(box['x1']) + ',' + str(box['y1']) + ',' + str(box['x2']) + ',' + str(box['y2']) + ',' + box['class'] + '\n')
file.writelines(lines)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment