Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
K
keras-frcnn
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
sjjsmuel
keras-frcnn
Commits
da1191da
Commit
da1191da
authored
Mar 04, 2020
by
Jonas Müller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
prep cross validation with respect to time issue
Split based on hour + basic check for issue based on the timedeltas
parent
ab7eebc7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
91 additions
and
28 deletions
+91
-28
prep_cross_validation.py
prep_cross_validation.py
+91
-28
No files found.
prep_cross_validation.py
View file @
da1191da
import
copy
import
os
import
random
from
datetime
import
datetime
from
optparse
import
OptionParser
from
pathlib
import
Path
from
keras_frcnn.simple_parser
import
get_data
def
split
(
a
,
n
):
k
,
m
=
divmod
(
len
(
a
),
n
)
return
(
a
[
i
*
k
+
min
(
i
,
m
):(
i
+
1
)
*
k
+
min
(
i
+
1
,
m
)]
for
i
in
range
(
n
))
def
get_date2
(
filename
):
if
filename
.
startswith
(
'IMG'
):
filename
=
filename
[
4
:]
#return filename[:8] # only dates
if
filename
==
'20190508-WA0026.jpg'
:
filename
=
'20190508_123000.jpg'
return
filename
[:
13
]
def
get_date
(
filename
):
return
get_date2
(
filename
)[:
-
2
]
#20190520_120309
def
time_diff
(
date1
,
date2
):
date1
=
get_date2
(
date1
)
date2
=
get_date2
(
date2
)
d1
=
datetime
.
strptime
(
date1
,
'%Y%m%d_%H%M'
)
d2
=
datetime
.
strptime
(
date2
,
'%Y%m%d_%H%M'
)
tmp
=
None
if
d1
<
d2
:
tmp
=
d1
d1
=
d2
d2
=
tmp
duration
=
d1
-
d2
# For build-in functions
duration_in_s
=
duration
.
total_seconds
()
return
duration_in_s
,
d1
,
d2
#if duration_in_s < 600:
# print(duration_in_s)
# print()
parser
=
OptionParser
()
...
...
@@ -23,44 +50,80 @@ if not options.annotation_all: # if filename is not given
parser
.
error
(
'Error: path to annotation file data must be specified. Pass --path to command line'
)
all_data
,
classes_count
,
class_mapping
=
get_data
(
options
.
annotation_all
)
dates
=
{}
file_path
_dict
=
{}
index
_dict
=
{}
for
idx
,
entry
in
enumerate
(
all_data
):
file_path_dict
[
idx
]
=
(
Path
(
entry
[
'filepath'
]).
name
,
entry
[
'filepath'
])
print
(
file_path_dict
)
# list of indexes representing where each number represents one image
images
=
list
(
range
(
len
(
file_path_dict
.
keys
())))
# shuffle the data
random
.
shuffle
(
images
)
fold
=
list
(
split
(
images
,
options
.
num_folds
))
print
(
fold
)
filename
=
Path
(
entry
[
'filepath'
]).
name
date
=
get_date
(
filename
)
print
(
date
)
if
date
not
in
dates
.
keys
():
dates
[
date
]
=
[
idx
]
else
:
dates
[
date
].
append
(
idx
)
index_dict
[
idx
]
=
(
filename
,
entry
[
'filepath'
])
print
(
dates
)
# print(index_dict)
len_of_dates
=
{}
for
date
in
dates
.
keys
():
if
len
(
dates
[
date
])
not
in
len_of_dates
.
keys
():
len_of_dates
[
len
(
dates
[
date
])]
=
[
date
]
else
:
len_of_dates
[
len
(
dates
[
date
])].
append
(
date
)
print
(
len_of_dates
)
# prepare ordered list containing the appearing length in the dataset
keys_of_len_of_dates
=
list
(
len_of_dates
.
keys
())
keys_of_len_of_dates
.
sort
(
reverse
=
True
)
# Create empty lists for each fold as sublist of folds
folds
=
[]
for
i
in
range
(
options
.
num_folds
):
folds
.
append
([])
# Append always the next longest sublist to the next shortest fold
for
key
in
keys_of_len_of_dates
:
for
date
in
len_of_dates
[
key
]:
#find first min len fold
min_fold
=
0
for
idx
,
fold
in
enumerate
(
folds
):
if
len
(
fold
)
<
len
(
folds
[
min_fold
]):
min_fold
=
idx
folds
[
min_fold
].
extend
(
dates
[
date
])
#print(folds)
for
fold
in
folds
:
fold
.
sort
()
print
(
'len'
,
len
(
fold
))
print
(
fold
)
train
=
{}
test
=
{}
for
i
in
range
(
len
(
fold
)):
test
[
i
]
=
copy
.
deepcopy
(
fold
[
i
])
for
j
in
range
(
len
(
fold
)):
for
i
in
range
(
len
(
fold
s
)):
test
[
i
]
=
copy
.
deepcopy
(
fold
s
[
i
])
for
j
in
range
(
len
(
fold
s
)):
if
i
!=
j
:
if
i
not
in
train
.
keys
():
train
[
i
]
=
copy
.
deepcopy
(
fold
[
j
])
train
[
i
]
=
copy
.
deepcopy
(
fold
s
[
j
])
else
:
train
[
i
].
extend
(
fold
[
j
])
train
[
i
].
extend
(
folds
[
j
])
train
[
i
].
sort
()
test
[
i
].
sort
()
#visualize what was done:
'''
for i in range(len(fold)):
for
i
in
range
(
len
(
folds
)):
print
(
'train'
)
print
(
train
[
i
])
print
(
'test'
)
print
(
test
[
i
])
print
()
'''
for
i
in
range
(
len
(
fold
)):
# write to files
for
i
in
range
(
len
(
folds
)):
new_dir
=
output_path
+
'fold_'
+
str
(
i
)
if
not
os
.
path
.
exists
(
new_dir
):
os
.
makedirs
(
new_dir
)
...
...
@@ -69,12 +132,12 @@ for i in range(len(fold)):
lines
=
[]
for
filenumber
in
test
[
i
]:
for
box
in
all_data
[
filenumber
][
'bboxes'
]:
lines
.
append
(
file_path_dict
[
filenumber
][
0
]
+
','
+
str
(
box
[
'x1'
])
+
','
+
str
(
box
[
'y1'
])
+
','
+
str
(
box
[
'x2'
])
+
','
+
str
(
box
[
'y2'
])
+
','
+
box
[
'class'
]
+
'
\n
'
)
lines
.
append
(
index_dict
[
filenumber
][
0
]
+
','
+
str
(
box
[
'x1'
])
+
','
+
str
(
box
[
'y1'
])
+
','
+
str
(
box
[
'x2'
])
+
','
+
str
(
box
[
'y2'
])
+
','
+
box
[
'class'
]
+
'
\n
'
)
file
.
writelines
(
lines
)
with
open
(
new_dir
+
'/annotations_train.txt'
,
'w'
)
as
file
:
lines
=
[]
for
filenumber
in
train
[
i
]:
for
box
in
all_data
[
filenumber
][
'bboxes'
]:
lines
.
append
(
file_path_dict
[
filenumber
][
0
]
+
','
+
str
(
box
[
'x1'
])
+
','
+
str
(
box
[
'y1'
])
+
','
+
str
(
box
[
'x2'
])
+
','
+
str
(
box
[
'y2'
])
+
','
+
box
[
'class'
]
+
'
\n
'
)
lines
.
append
(
index_dict
[
filenumber
][
0
]
+
','
+
str
(
box
[
'x1'
])
+
','
+
str
(
box
[
'y1'
])
+
','
+
str
(
box
[
'x2'
])
+
','
+
str
(
box
[
'y2'
])
+
','
+
box
[
'class'
]
+
'
\n
'
)
file
.
writelines
(
lines
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment