import json from pathlib import Path, PosixPath class AnnotationLocationLoader: def __init__(self, annotation_file='input/caries_dataset_annotation.json', images_base_folder=Path('input/test_data/'), mouth_annotations_folder=None): self._annotation_file = annotation_file self._img_path = None self._annotated_images = set() self._available_annotations = set() self._available_images = None self._data = {} if not type(images_base_folder) == PosixPath: images_base_folder = Path(images_base_folder) self._img_path = images_base_folder # get the names of the images witch are available as files self._available_images = self._get_names_from_available_images() self._load_annotations() if not mouth_annotations_folder == None: if not type(mouth_annotations_folder) == PosixPath: mouth_annotations_folder = Path(mouth_annotations_folder) self._mouth_annotations_folder = mouth_annotations_folder self._load_mouth_annotation_additon() self._annotated_images = list(self._annotated_images) self._available_annotations = list(self._available_annotations) def _get_names_from_available_images(self): """ Finds the names for all pictures available in the image base folder. :return: """ names_from_available_images = [] for path in [path for path in self._img_path.iterdir() if path.is_dir()]: names_from_available_images.extend([filename.name for filename in path.iterdir() if filename.is_file() and not filename.name.startswith('.')]) return names_from_available_images def _load_annotations(self): """ Loads all annotations for the available images from the json file to an internal structure. """ with open(self._annotation_file) as file: json_data = json.load(file) for picture in json_data: picture_filename = picture['External ID'] if not picture_filename in self._available_images: #print('File ”{}” not found.'.format(picture_filename)) continue if not picture_filename in self._data.keys(): self._data[picture_filename] = [] # Skip the 'Skip' entries in the file if not type(picture['Label']) == dict: continue for annotation_type in picture['Label'].keys(): self._available_annotations.add(annotation_type) self._annotated_images.add(picture_filename) for box in picture['Label'][annotation_type]: x_all = [] y_all = [] for point in box['geometry']: x_all.append(point['x']) y_all.append(point['y']) box_coord = [(min(x_all), min(y_all)), (max(x_all), max(y_all))] self._data[picture_filename].append((annotation_type.lower(), box_coord)) def _load_mouth_annotation_additon(self): """ Loads the additional annotations for the mouth for all available images if there is a corresponding folder given This represents the local constrains for the advanced training. """ annotation_files = [file for file in self._mouth_annotations_folder.iterdir() if file.is_file() and not file.name.startswith('.')] counter_number_of_annotated_but_missing_files = 0 for annotation_file in annotation_files: with open(annotation_file) as file: mouth_annotation = json.load(file) picture_filename = mouth_annotation['asset']['name'] if not picture_filename in self._available_images: #print('File ”{}” not found.'.format(picture_filename)) counter_number_of_annotated_but_missing_files += 1 continue if not picture_filename in self._data.keys(): self._data[picture_filename] = [] self._annotated_images.add(picture_filename) bb = mouth_annotation['regions'][0]['boundingBox'] top_left = (round(bb['left']), round(bb['top'])) bottom_right = (round(bb['left']+bb['width']), round(bb['top'] + bb['height'])) img_width = mouth_annotation['asset']['size']['width'] img_height = mouth_annotation['asset']['size']['height'] self._data[picture_filename].append(('mouth', [(img_width, img_height), top_left, bottom_right])) if counter_number_of_annotated_but_missing_files > 0: print('[INFO] {} mouth annotations were skipped during loading. This was done due to missing corresponding files in the assigned folder.'.format(counter_number_of_annotated_but_missing_files)) def get_all_types_of_annotations(self): """ :return: list of all the types of annotations witch appeared at least once in the annotation_file """ return self._available_annotations def get_all_annotated_images(self): """ :return: list of the names of all images witch have at least one annotation """ return self._annotated_images def get_all_available_images(self): """ :return: list of the names of all images witch have at least one annotation """ return self._available_images def is_annotated(self, image_name): """ Should check weather for the given filename an annotation exists :param image_name: complete name of the file including the filetype as a string :return: boolean weather there is an annotation for the image """ return image_name in self._annotated_images def get_annotations(self, image_name, filter=None): """ Returns a list of annotations for the given image_name e.g. [ ('caries', [(x1,y1), (x2,y2)]), _more_entries_ ] coords in form: [(top left), (bottom right)] :param filter: a list of strings representing the types of annotations the user wants to derive """ if not filter: filter = self._available_annotations # return anything but the annotation for the mouth if self.is_annotated(image_name): if filter and len(filter)>0: filter = [category.lower() for category in filter] return [annotation for annotation in self._data[image_name] if annotation[0] in filter] return self._data[image_name] else: return []