diff --git a/.gitignore b/.gitignore index 8db6aabd..4533ea7d 100644 --- a/.gitignore +++ b/.gitignore @@ -114,4 +114,16 @@ weights/*.h5 models/*.h5 # csv files -*.csv +tag/*.csv + +# TF exported graph files +.pb + +# VSCode +.vscode/*.json + +# Function extension /bin and /obj folders +Functions/pipeline/bin +Functions/pipeline/obj + +.DS_Store diff --git a/README.md b/README.md index f665d968..c6babc3a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Active learning + object detection Labeling images for object detection is commonly required task to get started with Computer Vision related project. -Good news that you do not have to label all images (draw bounding boxes) from scratch --- the goal of this project is to add (semi)automation to the process. +Good news that you do not have to label all images (draw bounding boxes) from scratch --- the goal of this project is to add (semi)automation to the process. +Please refer to this blog post that describes Active Learning and semi-automated flow: + [Active Learning for Object Detection in Partnership with Conservation Metrics](https://www.microsoft.com/developerblog/2018/11/06/active-learning-for-object-detection/) We will use Transfer Learning and Active Learning as core Machine Learning components of the pipeline. -- Transfer Learning: use powerful pre-trained on big dataset (COCO) model as a startining point for fine-tuning foe needed classes. -- Active Learning: human annotator labels small set of images (set1), trains Object Detection Model (model1) on this set1 and then uses model1 to predict bounding boxes on images (thus pre-labeling those). Human annotator reviews mode1's predictions where the model was less confident -- and thus comes up with new set of images -- set2. Next phase will be to train more powerful model2 on bigger train set that includes set1 and set2 and use model2 prediction results as draft of labeled set3… @@ -28,6 +30,11 @@ There is config.ini that needs to be updated with details like blob storage conn More details TBD. Basically the idea is to kick off Active Learning cycle with model retaining as soon as human annotator revises new set of images. +# Notes before we get started +- The steps below refer to updating config.ini. You can find detailed description of config [here](config_description.md) +- Got several thousands of images (or much more) and not sure if random sampling will be helpful to get rolling with labeling data? +Take a look at [Guide to "initialization" predictions](init_pred_desription.md). + # How to run semi-automated pipeline The flow below assumes the following: 1) We use Tensorflow Object Detection API (Faster RCNN with Resnet 50 as default option) to fine tune object detection. diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..961f48b6 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,40 @@ +# Python package +# Create and test a Python package on multiple Python versions. +# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/python + +jobs: + +- job: 'Test' + pool: + vmImage: 'Ubuntu 16.04' + strategy: + matrix: + #Azure Functions Python preview doesn't support 3.5 + #Python35: + # python.version: '3.5' + Python36: + python.version: '3.6' + Python37: + python.version: '3.7' + maxParallel: 4 + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install -r requirements.txt + displayName: 'Install dependencies' + + - script: | + pip install pytest + pytest cli functions utils --doctest-modules --junitxml=junit/test-results.xml + displayName: 'pytest' + + - task: PublishTestResults@2 + inputs: + testResultsFiles: '**/test-results.xml' + testRunTitle: 'Python $(python.version)' + condition: succeededOrFailed() diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 00000000..80c92091 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,77 @@ +## Management CLI + +Data management CLI to interact with data manager endpoints. + +This CLI attempts to be as simple as possible to allow uers to initialize a dataset, download a dataset, and upload a dataset to an Azure Storage blob. It presumes you have a functioning management endpoint and database. + +### Configuration + +Create an INI file, and store it anywhere. Copy the path to it, and add it to your environment variables as `ALCONFIG` + +Example: `export ALCONFIG=/path/to/config.ini` + +The INI file should contain the following sections and keys to operate properly + +``` +[FUNCTIONS] +FUNCTIONS_KEY= +FUNCTIONS_URL=https://mytagmanagement.azurewebsites.net/ + +[STORAGE] +STORAGE_ACCOUNT= +STORAGE_KEY= +STORAGE_TEMP_CONTAINER=temp-container +STORAGE_PERM_CONTAINER=perm-container + +[TAGGING] +TAGGING_LOCATION=~/taggingdata +TAGGING_USER=bhargav +TAGGING_IMAGE_DIR=${TAGGING:TAGGING_LOCATION}/AllImages +``` + +`FUNCTIONS_KEY` is the Azure Functions Key that allows your CLI to authenticate with the management function +`FUNCTIONS_URL` is the URL of the Function deployed to Azure + +`STORAGE_ACCOUNT` is the name of the Azure Storage Account used to upload images +`STORAGE_KEY` is the secret key of the Azure Storage Account +`STORAGE_TEMP_CONTAINER` is the name of the temporary container where the CLI deposits your image files while onboarding, and later copies them to perm storage. +`STORAGE_PERM_CONTAINER` is the name of the container where the CLI deposits your image files after onboarding, and the model after training. + +`TAGGING_LOCATION` is the location on the user's machine where media will be downloaded +`TAGGING_USER` is your username. +`TAGGING_IMAGE_DIR` is the location where images will be downloaded, usually /AllImages folder inside tagging_location. + +`TAGGING_IMAGE_DIR` is the location where all images will be downloaded to for training +`TAGGED_OUTPUT` is the location of the CSV file that will have human labelled data + +### Commands + +#### Initialize a dataset + +##### Existing Dataset from the file system. + +Usage: `python3 -m cli.cli onboard -f /path/to/images/` + +You can use this CLI invocation to upload images on your local file system to a temporary storage container, and onboard them into your dataset. * + +##### Existing Dataset already stored in a blob storage container + +Usage: `python3 -m cli.cli onboard -a MyStorageAccount -c MyStorageContainer -k MyStorageAccountKey` + +You can use this CLI invocation to onboard images that are already in a storage account and onboard them into your dataset. + +#### Download + +Usage: `python3 -m cli.cli download -n 50` + +Downloads 50 images to the location identified by `TAGGING_LOCATION` in your config. +There is an upper bound of 100 images that can be downloaded at present. + +Also generated is a VoTT json file containing any existing tags and labels. + +#### Upload tags + +Usage: `python3 -m cli.cli upload` + +Uploads the VoTT json file to be processed into the database. Will also delete the image directory +identified at `TAGGING_LOCATION`, so the next `download` cycle will commence without issue. diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cli/cli.py b/cli/cli.py new file mode 100644 index 00000000..a9e740ae --- /dev/null +++ b/cli/cli.py @@ -0,0 +1,53 @@ +import argparse +import os + +from utils.config import Config +from cli.operations import ( + download, + upload, + onboard_folder, + onboard_container, + LOWER_LIMIT, + UPPER_LIMIT +) + +if __name__ == "__main__": + # how i want to use the tool: + # cli.py download --num-images 40 + # cli.py upload + parser = argparse.ArgumentParser() + + parser.add_argument( + 'operation', + choices=['download', 'upload', 'onboard'] + ) + + parser.add_argument('-f', '--folder') + parser.add_argument('-a', '--storage-account') + parser.add_argument('-c', '--storage-container') + parser.add_argument('-k', '--storage-key') + parser.add_argument('-n', '--num-images', type=int) + + args = parser.parse_args() + operation = args.operation + config_path = os.environ.get('ALCONFIG') + + config = Config.read_config(config_path) + + if operation == 'download': + download(config, args.num_images) + elif operation == 'upload': + upload(config) + else: + if args.folder: + onboard_folder(config, args.folder) + elif args.storage_container and args.storage_account and args.storage_key: + onboard_container( + config, + args.storage_account, + args.storage_key, + args.storage_container + ) + else: + print("No folder, storage account, container, or key argument \ + passed - could not onboard.any") diff --git a/cli/operations.py b/cli/operations.py new file mode 100644 index 00000000..6135ff3e --- /dev/null +++ b/cli/operations.py @@ -0,0 +1,251 @@ +import requests +import time +import shutil +import json +import copy +import pathlib +import os +from urlpath import URL +from azure.storage.blob import BlockBlobService, ContentSettings +from utils.blob_utils import BlobStorage +from utils.vott_parser import process_vott_json, create_starting_vott_json, build_id_to_VottImageTag, create_vott_json_from_image_labels +from functions.pipeline.shared.db_access import ImageLabel, ImageTag + +DEFAULT_NUM_IMAGES = 40 +LOWER_LIMIT = 0 +UPPER_LIMIT = 100 + +azure_storage_client = None + + +class ImageLimitException(Exception): + pass + + +def supported_file_type(file_name): + if file_name.startswith('.'): + return False + + file_suffix = pathlib.Path(file_name).suffix.lower() + if file_suffix in ['.png', '.jpg', '.jpeg', '.gif']: + return True + else: + return False + + +# Somewhat of a hack to remove the folder name passed on the +# command line from the file path to make blob names more sane. +# If one invokes the cli with the onboard folder option and passes +# along a path to a folder, every file path will contain the folder prefix. +# Example invocation: python3 -m cli onboard /my/full/path +# os.walk will return: /my/full/path/1.jpg, /my/full/path/2.jpg, etc. +def strip_path_prefix(folder_name, file_path): + folder_name_str = str(folder_name) + file_path_str = str(file_path) + + stripped_path = file_path_str.replace(folder_name_str, "") + + if stripped_path.startswith('/'): + return pathlib.Path(stripped_path[1:]) + + return pathlib.Path(stripped_path) + + +# TODO We should create the container if it does not exist +def onboard_folder(config, folder_name): + blob_storage = BlobStorage.get_azure_storage_client(config) + user_name = config.get("tagging_user") + onboarding_files = [] + + print("Walking file system") + + for (root, dirs, files) in os.walk(folder_name): + # no files at this level. + if len(files) == 0: + continue + + for file_name in files: + if not supported_file_type(file_name): + continue + + relative_path = os.path.join(root, file_name) + onboarding_files.append(relative_path) + + if len(onboarding_files) == 0: + print(f'Could not find any valid files to upload') + return + + for blob_path in onboarding_files: + stripped_path = strip_path_prefix(folder_name, blob_path) + print("Uploading " + str(blob_path)) + + blob_metadata = { + "userFilePath": blob_path, + "uploadUser": config.get("tagging_user") + } + + blob_storage.create_blob_from_path( + config.get("storage_temp_container"), + stripped_path, # the name of the file in blob storage. + pathlib.Path(blob_path), + content_settings=ContentSettings(content_type='image/png'), + metadata=blob_metadata + ) + + # Trigger queue based onboarding. + onboard_container( + config, + config.get('storage_account'), + config.get('storage_key'), + config.get('storage_temp_container') + ) + + +def onboard_container(config, account, key, container): + print("onboarding from storage container") + function_url = config.get('url') + '/api/onboardcontainer' + user_name = config.get("tagging_user") + + print("Onboarding storage container " + container + " into dataset") + + query = { + "userName": user_name + } + + data = { + "storageAccount": account, + "storageAccountKey": key, + "storageContainer": container + } + + resp = requests.post(function_url, params=query, json=data) + resp.raise_for_status() + + print("Set up container for onboarding. Onboarding may take some time.") + + +def _download_bounds(num_images): + images_to_download = num_images + + if num_images is None: + images_to_download = DEFAULT_NUM_IMAGES + + if images_to_download <= LOWER_LIMIT or images_to_download > UPPER_LIMIT: + raise ImageLimitException() + + return images_to_download + + +def download(config, num_images, strategy=None): + # TODO: better/more proper URI handling. + functions_url = config.get("url") + "/api/images" + user_name = config.get("tagging_user") + images_to_download = _download_bounds(num_images) + query = { + "imageCount": images_to_download, + "userName": user_name, + "checkOut": "true" + } + + response = requests.get(functions_url, params=query) + response.raise_for_status() + + json_resp = response.json() + images_json = json.loads(json_resp["images"]) + count = len(images_json) + + print("Received " + str(count) + " files.") + + if count == 0: + print("No images could be retrieved with the current retrieval strategy!") + return + + file_tree = pathlib.Path(os.path.expanduser( + config.get("tagging_location")) + ) + + if file_tree.exists(): + print("Removing existing tag data directory: " + str(file_tree)) + + shutil.rmtree(str(file_tree), ignore_errors=True) + + data_dir = pathlib.Path(file_tree / "data") + data_dir.mkdir( + parents=True, + exist_ok=True + ) + checkedout_image_labels = [ImageLabel.fromJson(item) for item in images_json] + vott_json, image_urls = create_vott_json_from_image_labels(checkedout_image_labels, json_resp["classification_list"]) + + json_data = {'vott_json': vott_json, + 'imageUrls': image_urls} + + local_images = download_images(config, data_dir, json_data) + count = len(local_images) + print("Successfully downloaded " + str(count) + " images.") + for image_path in local_images: + print(image_path) + print("Ready to tag!") + + +def download_images(config, image_dir, json_resp): + print("Downloading files to " + str(image_dir)) + + # Write generated VoTT data from the function to a file. + write_vott_data(image_dir, json_resp) + + urls = json_resp['imageUrls'] + downloaded_file_paths = [] + for url_path in urls: + url = URL(url_path) + + # TODO: We will download an empty file if we get a permission error on the blob store URL + # We should raise an exception. For now the blob store must be publically accessible + response = requests.get(url) + file_path = pathlib.Path(image_dir / url.name) + + with open(str(file_path), "wb") as file: + for chunk in response.iter_content(chunk_size=128): + file.write(chunk) + file.close() + downloaded_file_paths.append(file_path) + return downloaded_file_paths + + +def write_vott_data(image_dir, json_resp): + # VOTT expects json file at same level as directory + data_file = pathlib.Path(image_dir / "../data.json") + vott_data = json_resp.get("vott_json", None) + + if not vott_data: + return + + with open(str(data_file), "w") as file: + vott_json_string = json.dumps(vott_data) + file.writelines(vott_json_string) + file.close() + + +def upload(config): + functions_url = config.get("url") + "/api/labels" + user_name = config.get("tagging_user") + tagging_location = pathlib.Path( + os.path.expanduser(config.get("tagging_location")) + ) + + print("Uploading VOTT json file...") + vott_json = pathlib.Path(tagging_location / "data.json") + + with open(str(vott_json)) as json_file: + json_data = json.load(json_file) + process_json = process_vott_json(json_data) + query = { + "userName": user_name, + "upload": "true" + } + + response = requests.post(functions_url, json=process_json, params=query) + response.raise_for_status() + + resp_json = response.json() + print("Done!") \ No newline at end of file diff --git a/cli/src/cli.py b/cli/src/cli.py deleted file mode 100644 index e1b56cc4..00000000 --- a/cli/src/cli.py +++ /dev/null @@ -1,41 +0,0 @@ -import argparse - -from operations import ( - init, - download, - upload, - abandon, - LOWER_LIMIT, - UPPER_LIMIT -) - -if __name__ == "__main__": - - # how i want to use the tool: - # python3 cli.py init --config /path/to/config.ini - # python3 cli.py download --num-images 40 - # python3 cli.py upload - # python3 cli.py abandon - parser = argparse.ArgumentParser() - - parser.add_argument( - 'operation', - choices=['init', 'download', 'upload', 'abandon'] - ) - - parser.add_argument('-n', '--num-images', type=int) - - parser.add_argument('-c', '--config') - - args = parser.parse_args() - - operation = args.operation - - if operation == 'init': - init(args.config) - elif operation == 'download': - download(args.num_images) - elif operation == 'upload': - upload() - else: - abandon() diff --git a/cli/src/operations.py b/cli/src/operations.py deleted file mode 100644 index f3f22f5b..00000000 --- a/cli/src/operations.py +++ /dev/null @@ -1,38 +0,0 @@ -DEFAULT_NUM_IMAGES = 40 -LOWER_LIMIT = 0 -UPPER_LIMIT = 100 - - -class MissingConfigException(Exception): - pass - - -class ImageLimitException(Exception): - pass - - -def init(config): - if (config is None): - raise MissingConfigException() - - raise NotImplementedError - - -def download(num_images): - images_to_download = num_images - - if num_images is None: - images_to_download = DEFAULT_NUM_IMAGES - - if images_to_download <= LOWER_LIMIT or images_to_download > UPPER_LIMIT: - raise ImageLimitException() - - return images_to_download - - -def upload(): - raise NotImplementedError() - - -def abandon(): - raise NotImplementedError() diff --git a/cli/src/test_operations.py b/cli/src/test_operations.py deleted file mode 100644 index e5564c31..00000000 --- a/cli/src/test_operations.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest -from operations import ( - init, - download, - upload, - abandon, - MissingConfigException, - ImageLimitException, - DEFAULT_NUM_IMAGES, - LOWER_LIMIT, - UPPER_LIMIT -) - - -class TestCLIOperations(unittest.TestCase): - def test_init(self): - with self.assertRaises(NotImplementedError): - init("fakeconfig") - - def test_init_missing_config(self): - with self.assertRaises(MissingConfigException): - init(None) - - def test_download_under_limit(self): - with self.assertRaises(ImageLimitException): - download(LOWER_LIMIT) - - def test_download_over_limit(self): - with self.assertRaises(ImageLimitException): - download(UPPER_LIMIT + 1) - - def test_download_missing_image_count(self): - downloaded_image_count = download(None) - self.assertEqual(DEFAULT_NUM_IMAGES, downloaded_image_count) - - def test_download_with_image_count(self): - downloaded_image_count = download(10) - self.assertEqual(10, downloaded_image_count) - - def test_upload(self): - with self.assertRaises(NotImplementedError): - upload() - - def test_abandon(self): - with self.assertRaises(NotImplementedError): - abandon() - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/cli/test_operations.py b/cli/test_operations.py new file mode 100644 index 00000000..12979c81 --- /dev/null +++ b/cli/test_operations.py @@ -0,0 +1,118 @@ +import unittest +import json +import pathlib +from unittest.mock import Mock + +from utils.config import Config, MissingConfigException +from utils.config import ( + FUNCTIONS_SECTION, + FUNCTIONS_KEY, + FUNCTIONS_URL, + STORAGE_SECTION, + STORAGE_KEY, + STORAGE_ACCOUNT, + STORAGE_TEMP_CONTAINER, + STORAGE_PERM_CONTAINER, + TAGGING_SECTION, + TAGGING_LOCATION_KEY, + TAGGING_USER_KEY +) +from .operations import ( + _download_bounds, + upload, + ImageLimitException, + DEFAULT_NUM_IMAGES, + LOWER_LIMIT, + UPPER_LIMIT +) + + +class TestCLIOperations(unittest.TestCase): + + def test_download_bounds_under_limit(self): + with self.assertRaises(ImageLimitException): + _download_bounds(LOWER_LIMIT) + + def test_download_bounds_over_limit(self): + with self.assertRaises(ImageLimitException): + _download_bounds(UPPER_LIMIT + 1) + + def test_download_bounds_missing_image_count(self): + downloaded_image_count = _download_bounds(None) + self.assertEqual(DEFAULT_NUM_IMAGES, downloaded_image_count) + + def test_download_bounds_with_image_count(self): + downloaded_image_count = _download_bounds(10) + self.assertEqual(10, downloaded_image_count) + + +class TestConfig(unittest.TestCase): + + def _mock_sections(self, sections, data): + def sections_function(): + return sections + + def data_function(self, name): + return data.get(name, None) + + test = Mock() + test.sections = sections_function + test.__getitem__ = data_function + + return test + + def test_missing_storage_section(self): + with self.assertRaises(MissingConfigException): + Config.read_config_with_parsed_config( + self._mock_sections([FUNCTIONS_SECTION], {}) + ) + + def test_missing_functions_section(self): + with self.assertRaises(MissingConfigException): + Config.read_config_with_parsed_config( + self._mock_sections([STORAGE_SECTION], {}) + ) + + def test_missing_tagging_section(self): + with self.assertRaises(MissingConfigException): + Config.read_config_with_parsed_config( + self._mock_sections([FUNCTIONS_SECTION, STORAGE_SECTION], {}) + ) + + def test_missing_functions_config_values(self): + with self.assertRaises(MissingConfigException): + Config.functions_config_section({}) + + def test_missing_storage_config_values(self): + with self.assertRaises(MissingConfigException): + Config.storage_config_section({}) + + def test_missing_tagging_config_values(self): + with self.assertRaises(MissingConfigException): + Config.tagging_config_section({}) + + def test_acceptable_config(self): + mock_data = self._mock_sections( + [STORAGE_SECTION, FUNCTIONS_SECTION, TAGGING_SECTION], + { + STORAGE_SECTION: { + STORAGE_KEY: "test", + STORAGE_ACCOUNT: "test", + STORAGE_TEMP_CONTAINER: "test", + STORAGE_PERM_CONTAINER: "test", + }, + FUNCTIONS_SECTION: { + FUNCTIONS_KEY: "test", + FUNCTIONS_URL: "test" + }, + TAGGING_SECTION: { + TAGGING_LOCATION_KEY: "test", + TAGGING_USER_KEY: "test" + } + } + ) + + Config.read_config_with_parsed_config(mock_data) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/config.ini b/config.ini index 4c6a0ef0..e507e099 100644 --- a/config.ini +++ b/config.ini @@ -5,7 +5,13 @@ image_container_name=activelearningimages label_container_name=activelearninglabels # IMAGE INFORMATION user_folders=True -classes=knots,date +classes=knots,defect +# Provide preferred distribution of images-review ratio. +# Last value corresponds to images were no object were detected. +# In the example below: 60% of images that use will be reviewing have at least one bbox with objct class1 (knot), +# 30% images that have bboxes for class (defects) +# and 10% of images get class "NULL" -- were neither knots nor defects were detected by the model +ideal_class_balance=0.6,0.3,0.1 filetype=*.png # TAGGING MACHINE tagging_location=C:\Users\t-yapand\Desktop\NewTag @@ -33,6 +39,8 @@ min_confidence=.5 test_percentage=.2 model_name=faster_rcnn_resnet50_coco_2018_01_28 optional_pipeline_url=https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/samples/configs/faster_rcnn_resnet50_pets.config +#Init Predictions +init_model_name=faster_rcnn_resnet101_coco_2018_01_28 # Config File Details old_label_path=PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt old_train_path=PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010 @@ -59,3 +67,7 @@ tf_val_record=${tf_record_location%.*}_val.${tf_record_location##*.} tf_url=http://download.tensorflow.org/models/object_detection/${model_name}.tar.gz pipeline_file=${download_location}/${model_name}/pipeline.config fine_tune_checkpoint=${download_location}/${model_name}/model.ckpt +tagging_output=${data_dir}/tagging.csv +init_pred_tf_url=http://download.tensorflow.org/models/object_detection/${init_model_name}.tar.gz +init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb + diff --git a/config_description.md b/config_description.md index 4112da73..030c4fd4 100644 --- a/config_description.md +++ b/config_description.md @@ -16,6 +16,15 @@ Certain information about the images is required before the tagging and training This determines whether or not the images in blob storage are within separate folders (e.g. by date or by weather condition). Set to True if they are, False if not. - classes: This is a comma separated list of all classes that are being tagged. Please ensure that there are no spaces in the list and only commas are used to separate names. +- ideal_class_balance +This is a comma separated list of requested classes distribution in images being reviewed by human expert. +Example (for 2-class scenario): +`ideal_class_balance=0.6,0.3,0.1` +In this example: + 60% of images that use will be reviewing will have at least one bbox with object class1, + 30% images that have bboxes for class (defects), + 10% of images get class "NULL" -- were neither knots nor defects were detected by the model. + - filetype: This is the type of image file used. The format is a glob pattern, so *.jpg for a .jpg file or *.png for a .png file. Note that only JPEG or PNG filetypes can be used with tensorflow. ## Tagging Machine diff --git a/dashboard/.gitignore b/dashboard/.gitignore new file mode 100644 index 00000000..58461f25 --- /dev/null +++ b/dashboard/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints \ No newline at end of file diff --git a/dashboard/config.json b/dashboard/config.json new file mode 100644 index 00000000..3271090a --- /dev/null +++ b/dashboard/config.json @@ -0,0 +1,8 @@ +{ + "database": "dbname", + "user": "username", + "host": "hostname", + "passw": "password", + "plotlyusername": "username", + "plotlyapikey": "apikey" +} \ No newline at end of file diff --git a/dashboard/dashboard.ipynb b/dashboard/dashboard.ipynb new file mode 100644 index 00000000..6d209a3d --- /dev/null +++ b/dashboard/dashboard.ipynb @@ -0,0 +1,455 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Active Learning dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/saakhta/anaconda3/lib/python3.7/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use \"pip install psycopg2-binary\" instead. For details see: .\n", + " \"\"\")\n" + ] + } + ], + "source": [ + "import json\n", + "import psycopg2\n", + "import pandas as pd\n", + "import plotly\n", + "import plotly.plotly as py\n", + "import plotly.graph_objs as go\n", + "\n", + "with open('config.json') as f:\n", + " conf = json.load(f)\n", + "conn_str = \"host={} dbname={} user={} password={}\".format(conf['host'], conf['database'], conf['user'], conf['passw'])\n", + "conn = psycopg2.connect(conn_str)\n", + "plotly.tools.set_credentials_file(username=conf['plotlyusername'], api_key=conf['plotlyapikey'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting a pie chart for current image tagging status:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "names = []\n", + "numbers = []\n", + "cursor = conn.cursor()\n", + "cursor.execute('select count(a.imageid), b.tagstatename from Image_Tagging_State a join tag_state b ON a.tagstateid = b.tagstateid group by b.tagstatename')\n", + "for (number, name) in cursor:\n", + " names.append(name)\n", + " numbers.append(number)\n", + "\n", + "fig = {\n", + " 'data': [{'labels': names,\n", + " 'values': numbers,\n", + " 'type': 'pie'}],\n", + " 'layout': {'title': 'Tag state by count'}\n", + " }\n", + "\n", + "py.iplot(fig)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting a time chart for the tagging activity over the last few days: " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~samiyaakhtar/0 or inside your plot.ly account where it is named 'basic-scatter'\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = []\n", + "numimages = []\n", + "cursor = conn.cursor()\n", + "cursor.execute(\"select count(imageid), date_trunc('day', modifieddtim) timerange from image_tagging_state_audit group by date_trunc('day', modifieddtim) order by timerange\")\n", + "for (numimage, date) in cursor:\n", + " x = pd.to_datetime(date)\n", + " dates.append(x)\n", + " numimages.append(numimage)\n", + "\n", + "# Create a trace\n", + "trace = go.Scatter(\n", + " x = dates,\n", + " y = numimages,\n", + " # mode = 'markers'\n", + " name = 'Number of tagging activities'\n", + ")\n", + "data = [trace]\n", + "layout = dict(title = 'Number of tagging activities by date', xaxis=dict(title='Date'), yaxis=dict(title='Tagging activities'))\n", + "fig = dict(data=data, layout=layout)\n", + "py.iplot(fig, filename='basic-scatter')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Top taggers " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor = conn.cursor()\n", + "usernames = []\n", + "tagcounts = []\n", + "cursor.execute(\"SELECT e.username, count(*) as TagCount FROM image_info a join Annotated_Labels b on a.imageid = b.imageid join user_info e on b.createdbyuser = e.userid group by e.username order by TagCount desc\")\n", + "for (username, tagcount) in cursor: \n", + " usernames.append(username)\n", + " tagcounts.append(tagcount)\n", + "\n", + "fig = {\n", + " 'data': [{'labels': usernames,\n", + " 'values': tagcounts,\n", + " 'type': 'pie'}],\n", + " 'layout': {'title': 'Top taggers by number of classifications'}\n", + " }\n", + "\n", + "py.iplot(fig)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_data = {}\n", + "cursor = conn.cursor()\n", + "cursor.execute(\"SELECT e.username, d.classificationname, count(*) as TagCount FROM image_info a join Annotated_Labels b on a.imageid = b.imageid join Annotated_Labels c on b.imagetagid = c.imagetagid join classification_info d on c.classificationid = d.classificationid join user_info e on b.createdbyuser = e.userid group by e.username, d.classificationname order by TagCount desc\")\n", + "for (username, classname, tagcount) in cursor:\n", + " if username not in class_data:\n", + " class_data[username] = {}\n", + " class_data[username]['classnames'] = []\n", + " class_data[username]['tagcount'] = []\n", + " class_data[username]['classnames'].append(classname)\n", + " class_data[username]['tagcount'].append(tagcount)\n", + " \n", + "data = []\n", + "for key in class_data:\n", + " trace = go.Bar(\n", + " x=class_data[key]['classnames'],\n", + " y=class_data[key]['tagcount'],\n", + " name=key\n", + " )\n", + " data.append(trace)\n", + "\n", + "layout = go.Layout(\n", + " barmode='stack',\n", + " title='Top taggers by classification info'\n", + ")\n", + "fig = go.Figure(data=data, layout=layout)\n", + "py.iplot(fig, filename='stacked-bar')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor = conn.cursor()\n", + "cursor.execute('SELECT b.classificationname, count(*) AS ClassificationCount FROM Annotated_Labels a join classification_info b on a.classificationid = b.classificationid group by classificationname order by ClassificationCount desc')\n", + "classnames = []\n", + "counts = []\n", + "for (classname, count) in cursor:\n", + " classnames.append(classname)\n", + " counts.append(count)\n", + "\n", + "trace = go.Bar(\n", + " x=classnames,\n", + " y=counts\n", + ")\n", + "data = [trace]\n", + "layout = dict(title = 'Top classifications', xaxis=dict(title='Classification'), yaxis=dict(title='Number of tags'))\n", + "fig = dict(data=data, layout=layout)\n", + "py.iplot(fig, filename='basic-plot')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor = conn.cursor()\n", + "cursor.execute('SELECT p.imageid, c.classificationname,p.boxconfidence, p.imageconfidence FROM Prediction_Labels p join classification_info c on c.classificationid = p.classificationid where trainingid = (SELECT MAX(trainingid) FROM training_info) order by boxconfidence ASC;')\n", + "classnames = []\n", + "counts = []\n", + "boxconfs = []\n", + "imageconfs = []\n", + "for (imageid, classname, boxconf, imageconf) in cursor:\n", + " classnames.append(classname)\n", + " boxconfs.append(boxconf)\n", + " imageconfs.append(imageconf)\n", + "\n", + "trace1 = go.Bar(\n", + " x=classnames,\n", + " y=boxconfs\n", + ")\n", + "\n", + "trace2 = go.Bar(\n", + " x=classnames,\n", + " y=imageconfs\n", + ")\n", + "data = [trace1, trace2]\n", + "layout = go.Layout(\n", + " barmode='group',\n", + " title = 'Least confident classifications', \n", + " xaxis=dict(title='Box confidence and Image confidence'), \n", + " yaxis=dict(title='Classnames')\n", + ")\n", + "fig = dict(data=data, layout=layout)\n", + "py.iplot(fig, filename='grouped-plot')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor = conn.cursor()\n", + "cursor.execute('SELECT p.imageid, c.classificationname,p.boxconfidence, p.imageconfidence FROM Prediction_Labels p join classification_info c on c.classificationid = p.classificationid where trainingid = (SELECT MAX(trainingid) FROM training_info) order by boxconfidence DESC;')\n", + "classnames = []\n", + "counts = []\n", + "boxconfs = []\n", + "imageconfs = []\n", + "for (imageid, classname, boxconf, imageconf) in cursor:\n", + " classnames.append(classname)\n", + " boxconfs.append(boxconf)\n", + " imageconfs.append(imageconf)\n", + "\n", + "trace1 = go.Bar(\n", + " x=classnames,\n", + " y=boxconfs\n", + ")\n", + "\n", + "trace2 = go.Bar(\n", + " x=classnames,\n", + " y=imageconfs\n", + ")\n", + "data = [trace1, trace2]\n", + "layout = go.Layout(\n", + " barmode='group',\n", + " title = 'Most confident classifications', \n", + " xaxis=dict(title='Box confidence and Image confidence'), \n", + " yaxis=dict(title='Classnames')\n", + ")\n", + "fig = dict(data=data, layout=layout)\n", + "py.iplot(fig, filename='grouped-plot')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~samiyaakhtar/0 or inside your plot.ly account where it is named 'basic-scatter'\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor = conn.cursor()\n", + "cursor.execute(\"SELECT count(c.classificationname), c.classificationname, date_trunc('day', t.createddtim) timerange FROM training_info t join class_performance p on t.trainingid = p.trainingid join classification_info c on c.classificationid = p.classificationid group by c.classificationname, timerange order by timerange desc;\")\n", + "classnames = []\n", + "counts = []\n", + "dates = []\n", + "for (count, classname, time) in cursor:\n", + " classnames.append(classname)\n", + " counts.append(count)\n", + " dates.append(time)\n", + " \n", + "\n", + "# Create a trace\n", + "trace = go.Scatter(\n", + " x = dates,\n", + " y = counts,\n", + " # mode = 'markers'\n", + " name = 'Class performance over time'\n", + ")\n", + "data = [trace]\n", + "layout = dict(title = 'Class performance over time', xaxis=dict(title='Date'), yaxis=dict(title='Class performance'))\n", + "fig = dict(data=data, layout=layout)\n", + "py.iplot(fig, filename='basic-scatter')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/db/README.md b/db/README.md new file mode 100644 index 00000000..c94fcf3f --- /dev/null +++ b/db/README.md @@ -0,0 +1,43 @@ +# Getting Starting With Active Learning Database Infrastructure + +This directory contains database schemas, deployment script, and test data generation. + +## Creating a PostgreSQL host on Azure + +The _Deploy-Postgres-DB_ shell script will deploy a [PostgreSQL server in Azure](https://azure.microsoft.com/en-us/services/postgresql/). The script assumes you have the [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest) installed and are already [logged into the CLI](https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli?view=azure-cli-latest) with the subscription you wish to deploy to. + +Once the above is ready run the command below by replacing the 3 arguments + +```sh +$ sh ../devops/deployment/Deploy-Postgres-DB.sh RESOURCE_GROUP_NAME POSTGRES_SERVER_NAME POSTGRES_USER +``` + +By default PostgreSQL server on Azure will enforce SSL connections. To disable them on deployment set the following environment variable prior to deployment: + +```sh +$ export DISABLE_SSL_POSTGRES=true +``` + +## Deploying a PostgreSQL database and installing resources + +The _install-db-resources.py_ file will install SQL resources from the functions, tables, and triggers directories. A pre-requiste for installation is to set environment variables for **DB_HOST**, **DB_USER**, and **DB_PASS** + +An example for setting the environment variables can be seen below + +```sh +$ export DB_HOST=(POSTGRES_SERVER_NAME) DB_USER=(POSTGRES_USER@POSTGRES_SERVER) DB_PASS=(PASSWORD) +``` + +**Please note**: The DB_PASS is the same password used when executing the _Deploy-Postgres-DB_ shell file. + +Now that environment variables are set execute the following where _(MyDatabaseName)_ is replaced with the name of the PostgreSQL database you want to create on the existing host. + +```sh +$ python3 install-db-resources.py (MyDatabaseName) +``` + +If all is successful you will see list of installed files. + +## Running an integration test on PostgreSQL DB on Azure + +TODO \ No newline at end of file diff --git a/db/data/seed_tag_states.sql b/db/data/seed_tag_states.sql new file mode 100644 index 00000000..8999dad2 --- /dev/null +++ b/db/data/seed_tag_states.sql @@ -0,0 +1,8 @@ +-- Set up the states +INSERT INTO Tag_State VALUES + (0, 'Not Ready'), + (1, 'Ready To Tag'), + (2, 'Tag In Progress'), + (3, 'Completed Tag'), + (4, 'Incomplete Tag'), + (5, 'Abandoned'); diff --git a/db/functions/log_image_info_insert.sql b/db/functions/log_image_info_insert.sql new file mode 100644 index 00000000..d5d55614 --- /dev/null +++ b/db/functions/log_image_info_insert.sql @@ -0,0 +1,12 @@ +--On insert of new image info rows we automatically create an entry in the state table +CREATE OR REPLACE FUNCTION log_image_info_insert() + RETURNS trigger AS + ' + BEGIN + INSERT INTO Image_Tagging_State(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,CreatedDtim) + VALUES(NEW.ImageId,0,NEW.CreatedByUser,current_timestamp,current_timestamp); + + RETURN NEW; + END; + ' + LANGUAGE plpgsql; \ No newline at end of file diff --git a/db/functions/log_image_tagging_state_changes.sql b/db/functions/log_image_tagging_state_changes.sql new file mode 100644 index 00000000..85580d51 --- /dev/null +++ b/db/functions/log_image_tagging_state_changes.sql @@ -0,0 +1,14 @@ +-- ActionFlag: 1 = insert, 2 = update, 3 = delete +CREATE OR REPLACE FUNCTION log_image_tagging_state_changes() + RETURNS trigger AS + ' + BEGIN + IF NEW.TagStateId <> OLD.TagStateId THEN + INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,ArchiveDtim,ActionFlag) + VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedByUser,NEW.ModifiedDtim,current_timestamp,2); + END IF; + + RETURN NEW; + END; + ' + LANGUAGE plpgsql; \ No newline at end of file diff --git a/db/functions/log_image_tagging_state_insert.sql b/db/functions/log_image_tagging_state_insert.sql new file mode 100644 index 00000000..366e67a4 --- /dev/null +++ b/db/functions/log_image_tagging_state_insert.sql @@ -0,0 +1,12 @@ +-- ActionFlag: 1 = insert, 2 = update, 3 = delete +CREATE OR REPLACE FUNCTION log_image_tagging_state_insert() + RETURNS trigger AS + ' + BEGIN + INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,ArchiveDtim,ActionFlag) + VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedByUser,NEW.ModifiedDtim,current_timestamp,1); + + RETURN NEW; + END; + ' + LANGUAGE plpgsql; \ No newline at end of file diff --git a/db/install-db-resources.py b/db/install-db-resources.py new file mode 100755 index 00000000..6dfbbb89 --- /dev/null +++ b/db/install-db-resources.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +import pg8000 +import os +import sys +import collections +import argparse +from os import listdir +from os.path import isfile, join + +default_postgres_db_name = "postgres" + +def read_file_as_string(local_file_name): + data = None + with open(local_file_name, 'r') as myfile: + data = myfile.read() + if not data: + print("The file {0} is empty. ".format(local_file_name)) + return data + +def execute_queries_from_map(conn, file_query_map): + cursor = conn.cursor() + if(len(file_query_map)>0): + print("Installed: \n") + for file_path,query in file_query_map.items(): + cursor.execute(query) + conn.commit() + print("\t{0}".format(file_path)) + return + +def database_exists(conn, db_name): + if db_name: + cursor = conn.cursor() + query = "SELECT 1 FROM pg_database WHERE datname=%s" + cursor.execute(query,(db_name,)) + row = cursor.fetchone() + if row: + return int(row[0]) == 1 + return False + +def create_database(conn, db_name): + if db_name: + cursor = conn.cursor() + conn.autocommit = True + query = "CREATE DATABASE {0};" + print("\nAttempting to create database '{0}'...This may take up to 30 seconds".format(db_name)) + cursor.execute(query.format(db_name)) + print("Successfully created database named '{0}'".format(db_name)) + else: + print("No database created due to empty parameter") + return + +def remove_database(conn, db_name): + if db_name: + cursor = conn.cursor() + conn.autocommit = True + query = "DROP DATABASE {0};" + print("\nAttempting to drop database '{0}'...This may take up to 30 seconds".format(db_name)) + cursor.execute(query.format(db_name)) + print("Successfully dropped database named '{0}'".format(db_name)) + else: + print("No database dropped due to empty parameter") + return + +def install_extensions(conn, list_of_extensions): + if (len(list_of_extensions) > 0): + cursor = conn.cursor() + conn.autocommit = True + for ext in list_of_extensions: + query = "CREATE EXTENSION {0};" + cursor.execute(query.format(ext)) + print("Installed extension named '{0}'".format(ext)) + else: + print("No extensions to install") + return + +def get_connection(): + return __new_postgres_connection(os.environ['DB_HOST'],os.environ['DB_NAME'],os.environ['DB_USER'],os.environ['DB_PASS']) + +def __new_postgres_connection(host_name,db_name,db_user,db_pass): + return pg8000.connect(db_user, host=host_name, unix_sock=None, port=5432, database=db_name, password=db_pass, ssl=True, timeout=None, application_name=None) + +def get_file_query_map(sub_dir_name): + dirname = os.path.dirname(__file__) + full_sub_dir_path = os.path.join(dirname, sub_dir_name) + sub_dir_scripts = [join(full_sub_dir_path, f) for f in listdir(full_sub_dir_path) if isfile(join(full_sub_dir_path, f))] + file_query_map = {f:read_file_as_string(f) for f in sub_dir_scripts} + return file_query_map + +def get_default_connection(): + return __new_postgres_connection(os.environ['DB_HOST'],default_postgres_db_name,os.environ['DB_USER'],os.environ['DB_PASS']) + +def get_connection_for_db(db_name): + return __new_postgres_connection(os.environ['DB_HOST'],db_name,os.environ['DB_USER'],os.environ['DB_PASS']) + +def execute_files_in_dir_list(conn,list_of_sub_dirs): + for sub_dir in list_of_sub_dirs: + print("\n****\tReading files in '{0}' directory\t****\n".format(sub_dir)) + file_query_map = get_file_query_map(sub_dir) + file_query_map = collections.OrderedDict(sorted(file_query_map.items())) + if '' in file_query_map.values(): + print("One of the files is empty. Please fix") + return + execute_queries_from_map(conn,file_query_map) + +def main(db_name, overwrite_db): + try: + if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_PASS") is None): + print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") + return + + if (database_exists(get_default_connection(), db_name) and overwrite_db): + remove_database(get_default_connection(),db_name) + elif (database_exists(get_default_connection(), db_name) and not overwrite_db): + print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) + return + + #Set up the database + create_database(get_default_connection(),db_name) + + #Install extensions + install_extensions(get_connection_for_db(db_name),['citext']) + + #Connect to the new database and install resources + conn = get_connection_for_db(db_name) + sub_dirs = ["tables","functions","triggers","data"] + execute_files_in_dir_list(conn,sub_dirs) + + print("Done!") + except Exception as e: + print(e) + #traceback.print_exc() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('database_name', type=str, + help='The name of the database to create and install resources on') + + parser.add_argument('-o','--overwrite', action='store_true', + help='Will drop and restore a database if it already exists') + + args = parser.parse_args() + database_name = args.database_name + main(args.database_name,args.overwrite) diff --git a/db/postgres-client.py b/db/postgres-client.py new file mode 100644 index 00000000..a39d36b6 --- /dev/null +++ b/db/postgres-client.py @@ -0,0 +1,180 @@ +import sys +import string +import pg8000 +import random +import os +import time +import logging +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from functions.pipeline.shared.db_access import ImageTagDataAccess +from functions.pipeline.shared.db_provider import PostGresProvider, DatabaseInfo +from functions.pipeline.shared.db_access.db_access_v2 import generate_test_image_infos + +def get_transformed_id_to_url_map(id_to_url_map): + updated_image_id_url_map = {} + for image_id, old_url in id_to_url_map.items(): + replaced_path = old_url.replace('new-uploads','perm-uploads') + file_name_to_replace = extract_image_name_no_suffix(replaced_path) + transformed_path = replaced_path.replace(file_name_to_replace,str(image_id)) + updated_image_id_url_map[image_id] = transformed_path + return updated_image_id_url_map + +def pretty_print_audit_history(conn, list_of_image_ids): + if(len(list_of_image_ids) > 0): + cursor = conn.cursor() + image_ids_as_strings = [str(i) for i in list_of_image_ids] + images_to_audit = '{0}'.format(', '.join(image_ids_as_strings)) + query = ("SELECT a.imageid,c.originalimagename, b.tagstatename, d.username, a.ArchiveDtim FROM image_tagging_state_audit a " + "JOIN tagstate b ON a.tagstateid = b.tagstateid " + "JOIN image_info c on a.imageid = c.imageid " + "JOIN user_info d on a.modifiedbyuser = d.userid " + "WHERE a.ImageId in ({0}) " + "ORDER BY a.ImageId,ArchiveDtim ASC") + cursor.execute(query.format(images_to_audit)) + row = cursor.fetchone() + print() + if(row != None): + print("ImageId\tImgName\tTagState\tUser\tLoggedTime") + while row: + print("{0}\t{1}\t{2}\t{3}\t{4}".format(str(row[0]),str(row[1]),str(row[2]),str(row[3]),str(row[4]))) + row = cursor.fetchone() + else: + print("No images!") + +def extract_image_name_no_suffix(url): + start_idx = url.rfind('/')+1 + end_idx = url.rfind('.') + return url[start_idx:end_idx] + +def extract_image_id_from_urls(list_of_image_urls): + extracted_image_ids = [] + for url in list_of_image_urls: + extracted_id = int(extract_image_name_no_suffix(url)) + extracted_image_ids.append(extracted_id) + return extracted_image_ids + +def main(num_of_images,user_name): + try: + if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_NAME") is None or os.getenv("DB_PASS") is None): + print("Please set environment variables for DB_HOST, DB_USER, DB_NAME, DB_PASS") + return + + if(num_of_images < 5 or num_of_images > 20): + print("Number of images should be between 5 and 20") + return + + if(not user_name): + print("User name cannot be empty or whitespace") + return + ################################################################# + # Below we simulate the following scenarios: + # Creating a User + # Onboarding of new images + # Checking out images to tag + # Checking in images that have or have not been tagged + ################################################################# + + db_config = DatabaseInfo(os.getenv("DB_HOST"),os.getenv("DB_NAME"),os.getenv("DB_USER"),os.getenv("DB_PASS")) + pg = PostGresProvider(db_config) + data_access = ImageTagDataAccess(pg) + user_id = data_access.create_user(user_name) + + NUMBER_OF_IMAGES = num_of_images + + # Simulate new images from VOTT getting created in some blob store + mocked_images = generate_test_image_infos(NUMBER_OF_IMAGES) + print() + print("***\tSubject matter experts use the CLI to upload new images...") + time.sleep(1) + print() + # Simulate the data access layer creating entries in the DB for the new images + # and returning a map of the original image url to generaled image id + url_to_image_id_map = data_access.add_new_images(mocked_images, user_id) + print() + + print("***\tBehind the scenes Az Functions move the images to a new blob location") + time.sleep(1) + print() + #Invert the above map since the client will now be using the image id as a key + image_to_url = {v: k for k, v in url_to_image_id_map.items()} + + # Simulates when the client has moved images to a new blob store container + # and creates a payload for the data access layer with a map for image id to new urls + updated_image_id_url_map = get_transformed_id_to_url_map(image_to_url) + + # Simulates the call the client makes to the data access layer + # with the new payload. Image urls get updated in the DB + data_access.update_image_urls(updated_image_id_url_map, user_id) + + print() + print("***\tThe newly uploaded images are now onboarded with a 'ready to tag' state. See audit history") + print() + time.sleep(1) + + # Prints the audit history of the generated of all the newly onboarded + # images involved in the simulation to prove the state tracking for onboarding. + image_ids = list(updated_image_id_url_map.keys()) + pretty_print_audit_history(pg.get_connection(),image_ids) + time.sleep(3) + print() + + print("***\tSubject matter experts use the CLI to retrieve images in a 'ready to tag' state") + time.sleep(2) + print() + + list_of_image_urls = data_access.get_images_for_tagging(NUMBER_OF_IMAGES, user_id) + print() + print("***\tLet's wait for image taggers to get through the set of images....") + time.sleep(5) + print() + print("***\tDone! Though the subject matter experts didn't complete tagging all images") + time.sleep(2) + print() + + ''' + print("***\tRegardless the SMEs use the CLI to post the VOTT json results") + print() + # Since we rename the original image name to a integer that matchs the DB image id + # we need to extract out the image ids. Below this code is simulates extracting + # image ids from the VOTT JSON + extracted_image_ids = extract_image_id_from_urls(list_of_image_urls) + + # Let assume 3 images got tagged and 2 images did not. The client will + # call corresponding methods to update tagged and untagged states + completed_tagged_ids = [] + incomplete_tagged_ids = [] + num_of_incomplete = NUMBER_OF_IMAGES/5 + for idx, img_id in enumerate(extracted_image_ids): + if(idx > num_of_incomplete): + completed_tagged_ids.append(img_id) + else: + incomplete_tagged_ids.append(img_id) + + data_access.update_tagged_images(completed_tagged_ids,user_id) + data_access.update_incomplete_images(incomplete_tagged_ids,user_id) + + print() + print("***\tVOTT json results are posted. Lets take a look at the audit history") + time.sleep(2) + # Finally lets look at the audit history again. We expect to see some images as tagged + # and some as incomplete + print() + pretty_print_audit_history(pg.get_connection(),image_ids) + + print() + print("Success!") + ''' + #__verify_connect_to_db(get_connection()) + #get_unvisited_items(get_connection(),count_of_images) + except Exception as e: print(e) + +if __name__ == "__main__": + #print(sys.path) + console = logging.StreamHandler() + log = logging.getLogger() + log.setLevel(logging.getLevelName('DEBUG')) + log.addHandler(console) + if (len(sys.argv) != 3): + print("Usage: {0} (Number of Images) (User Name)".format(sys.argv[0])) + else: + main(int(sys.argv[1]), str(sys.argv[2])) diff --git a/db/tables/000_classification_info.sql b/db/tables/000_classification_info.sql new file mode 100644 index 00000000..cdf70e20 --- /dev/null +++ b/db/tables/000_classification_info.sql @@ -0,0 +1,7 @@ +-- Set up table +CREATE TABLE Classification_Info ( + ClassificationId SERIAL PRIMARY KEY, + ClassificationName citext NOT NULL UNIQUE, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); diff --git a/db/tables/000_image_tagging_state_audit.sql b/db/tables/000_image_tagging_state_audit.sql new file mode 100644 index 00000000..9723398f --- /dev/null +++ b/db/tables/000_image_tagging_state_audit.sql @@ -0,0 +1,9 @@ +CREATE TABLE Image_Tagging_State_Audit ( + RowId serial primary key, + ImageId integer NOT NULL, + TagStateId integer NOT NULL, + ModifiedByUser integer NOT NULL, + ModifiedDtim timestamp NOT NULL, + ArchiveDtim timestamp NOT NULL, + ActionFlag integer NOT NULL +); \ No newline at end of file diff --git a/db/tables/000_tag_state.sql b/db/tables/000_tag_state.sql new file mode 100644 index 00000000..2c77972d --- /dev/null +++ b/db/tables/000_tag_state.sql @@ -0,0 +1,5 @@ +-- Set up table +CREATE TABLE Tag_State ( + TagStateId integer PRIMARY KEY, + TagStateName text NOT NULL +); \ No newline at end of file diff --git a/db/tables/000_user_info.sql b/db/tables/000_user_info.sql new file mode 100644 index 00000000..a6f7b689 --- /dev/null +++ b/db/tables/000_user_info.sql @@ -0,0 +1,7 @@ +-- Simple User table +CREATE TABLE User_Info ( + UserId SERIAL PRIMARY KEY, + UserName citext NOT NULL UNIQUE, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/010_image_info.sql b/db/tables/010_image_info.sql new file mode 100644 index 00000000..b30ebfc1 --- /dev/null +++ b/db/tables/010_image_info.sql @@ -0,0 +1,11 @@ +-- Set up table and autoincrementing primary key +CREATE TABLE Image_Info ( + ImageId SERIAL PRIMARY KEY, + OriginalImageName text NOT NULL, + ImageLocation text, + Height integer NOT NULL, + Width integer NOT NULL, + CreatedByUser integer REFERENCES User_Info(UserId), + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/010_training_info.sql b/db/tables/010_training_info.sql new file mode 100644 index 00000000..bd0503ce --- /dev/null +++ b/db/tables/010_training_info.sql @@ -0,0 +1,11 @@ +CREATE TABLE Training_Info ( + TrainingId SERIAL PRIMARY KEY, + TrainingDescription text, + ModelLocation text NOT NULL UNIQUE, + ClassPerfAvg decimal(6,5) NOT NULL, + --Consider additional metadata like a path to zip file + --containing the pipeline.config, model, etc. + CreatedByUser integer REFERENCES User_Info(UserId), + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); diff --git a/db/tables/100_annotated_labels.sql b/db/tables/100_annotated_labels.sql new file mode 100644 index 00000000..405da5dc --- /dev/null +++ b/db/tables/100_annotated_labels.sql @@ -0,0 +1,14 @@ +-- Set up table +CREATE TABLE Annotated_Labels ( + ImageTagId SERIAL UNIQUE, + ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, + ClassificationId integer REFERENCES Classification_Info(ClassificationId), + X_Min decimal(6,2) NOT NULL, + X_Max decimal(6,2) NOT NULL, + Y_Min decimal(6,2) NOT NULL, + Y_Max decimal(6,2) NOT NULL, + CreatedByUser integer REFERENCES User_Info(UserId), + CreatedDtim timestamp NOT NULL default current_timestamp, + --VOTT_Data json NOT NULL + PRIMARY KEY (ImageId,ClassificationId,X_Min,X_Max,Y_Min,Y_Max) --Should we include the bounded box as well? +); \ No newline at end of file diff --git a/db/tables/100_class_performance.sql b/db/tables/100_class_performance.sql new file mode 100644 index 00000000..a063868e --- /dev/null +++ b/db/tables/100_class_performance.sql @@ -0,0 +1,6 @@ +CREATE TABLE Class_Performance ( + TrainingId integer REFERENCES Training_Info(TrainingId), + ClassificationId integer REFERENCES Classification_Info(ClassificationId), + AvgPerf decimal(6,5) NOT NULL, + PRIMARY KEY (TrainingId,ClassificationId) +); \ No newline at end of file diff --git a/db/tables/100_image_tagging_state.sql b/db/tables/100_image_tagging_state.sql new file mode 100644 index 00000000..0aad85ce --- /dev/null +++ b/db/tables/100_image_tagging_state.sql @@ -0,0 +1,8 @@ +-- Set up table and autoincrementing primary key +CREATE TABLE Image_Tagging_State ( + ImageId integer REFERENCES Image_Info(ImageId), + TagStateId integer REFERENCES Tag_State(TagStateId), + ModifiedByUser integer REFERENCES User_Info(UserId), + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/110_prediction_labels.sql b/db/tables/110_prediction_labels.sql new file mode 100644 index 00000000..04af9076 --- /dev/null +++ b/db/tables/110_prediction_labels.sql @@ -0,0 +1,12 @@ +CREATE TABLE Prediction_Labels ( + TrainingId integer REFERENCES Training_Info(TrainingId), + ImageId integer REFERENCES Image_Info(ImageId), + ClassificationId integer REFERENCES Classification_Info(ClassificationId), + X_Min decimal(6,2) NOT NULL, + X_Max decimal(6,2) NOT NULL, + Y_Min decimal(6,2) NOT NULL, + Y_Max decimal(6,2) NOT NULL, + BoxConfidence decimal(5,4) NOT NULL, + ImageConfidence decimal(5,4) NOT NULL, + PRIMARY KEY (TrainingId,ImageId,ClassificationId,X_Min,X_Max,Y_Min,Y_Max) +); diff --git a/db/triggers/image_info_insert.sql b/db/triggers/image_info_insert.sql new file mode 100644 index 00000000..6ff15de4 --- /dev/null +++ b/db/triggers/image_info_insert.sql @@ -0,0 +1,5 @@ +--DROP TRIGGER IF EXISTS image_info_insert ON Image_Info; +CREATE TRIGGER image_info_insert + AFTER INSERT ON Image_Info + FOR EACH ROW + EXECUTE PROCEDURE log_image_info_insert(); \ No newline at end of file diff --git a/db/triggers/image_tagging_state_changes.sql b/db/triggers/image_tagging_state_changes.sql new file mode 100644 index 00000000..c4677fe7 --- /dev/null +++ b/db/triggers/image_tagging_state_changes.sql @@ -0,0 +1,5 @@ +--DROP TRIGGER IF EXISTS image_tagging_state_changes ON Image_Tagging_State; +CREATE TRIGGER image_tagging_state_changes + BEFORE UPDATE ON Image_Tagging_State + FOR EACH ROW + EXECUTE PROCEDURE log_image_tagging_state_changes(); diff --git a/db/triggers/image_tagging_state_insert.sql b/db/triggers/image_tagging_state_insert.sql new file mode 100644 index 00000000..a917c8eb --- /dev/null +++ b/db/triggers/image_tagging_state_insert.sql @@ -0,0 +1,5 @@ +--DROP TRIGGER IF EXISTS image_tagging_state_insert ON Image_Tagging_State; +CREATE TRIGGER image_tagging_state_insert + AFTER INSERT ON Image_Tagging_State + FOR EACH ROW + EXECUTE PROCEDURE log_image_tagging_state_insert(); \ No newline at end of file diff --git a/devops/deployment/Deploy-AppInsights.sh b/devops/deployment/Deploy-AppInsights.sh new file mode 100755 index 00000000..3c85d46d --- /dev/null +++ b/devops/deployment/Deploy-AppInsights.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +#Exit on error +set -e + +ResourceGroup=$1 +AppInsightsName=$2 + +# Check if any of the args are empty +if [ -z "$1" ] || [ -z "$2" ]; then + echo "Usage: 'sh $0 (Azure Resource Group Name) (AppInsights Name)'" + exit 1 +fi + +az resource create \ + --resource-group $ResourceGroup \ + --resource-type "Microsoft.Insights/components" \ + --name $AppInsightsName \ + --location WestUS2 \ + --properties '{"Application_Type":"other", "Flow_Type":"Redfield", "Request_Source":"IbizaAIExtension","HockeyAppId": null,"SamplingPercentage": null}' \ No newline at end of file diff --git a/devops/deployment/Deploy-Infrastructure.sh b/devops/deployment/Deploy-Infrastructure.sh new file mode 100755 index 00000000..06ede888 --- /dev/null +++ b/devops/deployment/Deploy-Infrastructure.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +# Check if we are in a virutal env (needed to deploy functions, database) +if [ -z "$VIRTUAL_ENV" ]; then + echo "A virtual environment using Python 3.6 is needed. Please make sure Python 3.6" + echo "is installed and the virtual environment is created with the appropriate command" + echo "which would be something like: python3.6 -m venv " + echo "" + echo "You will then need to activate the venv, which on *nix based systems would be:" + echo " source /bin/activate" + exit 1 +fi + +# Check if the python version is 3.6 +#python --version | awk '{print $2}' | grep "^3.6" >& /dev/null +#if [ "$?" -ne "0" ]; then +# echo "Python version 3.6.x is required." +# exit 1 +#fi + +# Check if any of the args are empty +if [ -z "$1" ]; then + echo "Usage: 'sh $0 (Configuration file)' or SET Environment Variables" +fi + +# If arg exists but the config file isn't present? +if [ -n "$1" ] && [ ! -e "$1" ]; then + echo "Configuration file does not exist." + exit 1 +elif [ -e "$1" ]; then + # Read configuration + . $1 +fi + +#Verify env vars are set +[ -z "$RESOURCE_GROUP" ] && echo "Need to set RESOURCE_GROUP" && exit 1; +[ -z "$RESOURCE_LOCATION" ] && echo "Need to set RESOURCE_LOCATION" && exit 1; +[ -z "$PROJECT_STORAGE_ACCOUNT" ] && echo "Need to set PROJECT_STORAGE_ACCOUNT" && exit 1; +[ -z "$PROJECT_STORAGE_TEMP_CONTAINER" ] && echo "Need to set PROJECT_STORAGE_TEMP_CONTAINER" && exit 1; +[ -z "$PROJECT_STORAGE_PERM_CONTAINER" ] && echo "Need to set PROJECT_STORAGE_PERM_CONTAINER" && exit 1; +[ -z "$DATABASE_NAME" ] && echo "Need to set DATABASE_NAME" && exit 1; +[ -z "$DATABASE_SERVER_NAME" ] && echo "Need to set DATABASE_SERVER_NAME" && exit 1; +[ -z "$DATABASE_USERNAME" ] && echo "Need to set DATABASE_USERNAME" && exit 1; +[ -z "$DATABASE_PASSWORD" ] && echo "Need to set DATABASE_PASSWORD" && exit 1; +[ -z "$APPINSIGHTS_NAME" ] && echo "Need to set APPINSIGHTS_NAME" && exit 1; +[ -z "$FUNCTION_STORAGE_ACCOUNT" ] && echo "Need to set FUNCTION_STORAGE_ACCOUNT" && exit 1; +[ -z "$FUNCTION_APP_NAME" ] && echo "Need to set FUNCTION_APP_NAME" && exit 1; + +# Install reuired python modules +pip install -r ../../requirements.txt + +#Conditional Postgres Server deployment to speed up scheduled automated deploys +DEPLOY_POSTGRES_SERVER=${DEPLOY_POSTGRES:="true"} + +# Setup database +DATABASE_USERNAME_AT_HOST="$DATABASE_USERNAME@$DATABASE_SERVER_NAME" + +#Only skip the deploy if the server exists and we are configured not to deploy +query_result=$(az postgres server list --query "[?name=='$DATABASE_SERVER_NAME'].name") +if [[ $query_result =~ $DATABASE_SERVER_NAME ]] && ! $DEPLOY_POSTGRES_SERVER; +then + echo && echo "Skipping deployment of PostgreSQL server $DATABASE_SERVER_NAME" && echo +else + #First see if the postgres server exists. + ps_query_result=$(az postgres server list -g $RESOURCE_GROUP --query "[?name=='$DATABASE_SERVER_NAME'].name") + if [[ $ps_query_result =~ $DATABASE_SERVER_NAME ]]; + then + echo "Postgres server $DATABASE_SERVER_NAME already exists. Removing..." + az postgres server delete -g $RESOURCE_GROUP -n $DATABASE_SERVER_NAME -y + fi + echo "Entering deployment of PostgreSQL server $DATABASE_SERVER_NAME" + . ./Deploy-Postgres-DB.sh $RESOURCE_GROUP $DATABASE_SERVER_NAME "$DATABASE_USERNAME" $DATABASE_PASSWORD + if [ "$?" -ne 0 ]; then + echo "Unable to setup database" + exit 1 + fi +fi + +# Setup database schema +echo "Installing of database resources to PostgreSQL server $DATABASE_SERVER_NAME" +DB_HOST_FULL_NAME="$DATABASE_SERVER_NAME"".postgres.database.azure.com" +(cd ../../db && export DB_HOST=$DB_HOST_FULL_NAME && export DB_USER="$DATABASE_USERNAME_AT_HOST" && export DB_PASS=$DATABASE_PASSWORD && ./install-db-resources.py --overwrite $DATABASE_NAME) + +# Setup app insights +. ./Deploy-AppInsights.sh $RESOURCE_GROUP $APPINSIGHTS_NAME +if [ "$?" -ne 0 ]; then + echo "Unable to setup app insights" + exit 1 +fi + +# Setup storage assets needed by functions +export RESOURCE_GROUP=$RESOURCE_GROUP +export STORAGE_NAME=$PROJECT_STORAGE_ACCOUNT +export STORAGE_TEMP_CONTAINER=$PROJECT_STORAGE_TEMP_CONTAINER +export STORAGE_PERM_CONTAINER=$PROJECT_STORAGE_PERM_CONTAINER +./Deploy-Storage.sh +if [ "$?" -ne 0 ]; then + echo "Unable to create storage accounts and containers" + exit 1 +fi + +STORAGE_CONNECTION_STRING=$(az storage account show-connection-string -n $PROJECT_STORAGE_ACCOUNT -g $RESOURCE_GROUP --query "connectionString") + +# Setup azure python function +PROJECT_STORAGE_ACCOUNT_KEY=$(az storage account keys list -n $PROJECT_STORAGE_ACCOUNT --query [0].value --resource-group $RESOURCE_GROUP) +. ./Deploy-Python-Functions-App.sh \ + $RESOURCE_GROUP \ + $FUNCTION_STORAGE_ACCOUNT \ + $FUNCTION_APP_NAME \ + $APPINSIGHTS_NAME \ + $PROJECT_STORAGE_ACCOUNT \ + $PROJECT_STORAGE_ACCOUNT_KEY \ + $PROJECT_STORAGE_TEMP_CONTAINER \ + $PROJECT_STORAGE_PERM_CONTAINER \ + $DB_HOST_FULL_NAME \ + $DATABASE_USERNAME_AT_HOST \ + $DATABASE_PASSWORD \ + $DATABASE_NAME \ + $STORAGE_CONNECTION_STRING +if [ "$?" -ne 0 ]; then + echo "Unable to setup app insights" + exit 1 +fi + +. ./Deploy-Pipeline-Functions.sh $FUNCTION_APP_NAME ../../functions/pipeline +if [ "$?" -ne 0 ]; then + echo "Unable to deploy pipeline functions" + exit 1 +fi \ No newline at end of file diff --git a/devops/deployment/Deploy-Pipeline-Functions.sh b/devops/deployment/Deploy-Pipeline-Functions.sh new file mode 100644 index 00000000..be8c1c8a --- /dev/null +++ b/devops/deployment/Deploy-Pipeline-Functions.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Check if any of the args are empty +if [ -z "$1" ] || [ -z "$2" ]; then + echo "Usage: 'sh $0 (Azure function app name) (Function directory)'" + exit 1 +fi + +# Check that the function directory exists +if [ ! -e "$2" ]; then + echo "Function directory does not exist -- $2" + exit 1 +fi + +(cd $2 && func azure functionapp publish $1 --force --build-native-deps --no-bundler) +if [ "$?" -ne 0 ]; then + echo "Error deploying pipeline functions" + exit 1 +fi \ No newline at end of file diff --git a/devops/deployment/Deploy-Postgres-DB.sh b/devops/deployment/Deploy-Postgres-DB.sh new file mode 100755 index 00000000..2b42d24b --- /dev/null +++ b/devops/deployment/Deploy-Postgres-DB.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +#Exit on error +set -e + +ResourceGroup=$1 +ServerName=$2 +DBUserName=$3 +DBPassword=$4 +Local_IP_Address=$(curl -s http://whatismyip.akamai.com/) + +# Check if any of the args are empty +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then + echo "Usage: 'sh $0 (Azure Resource Group Name) (PostGres ServerName) (PostGres UserName) (PostGres Password)'" + exit 1 +fi + +# See Azure password policy: https://docs.microsoft.com/en-us/previous-versions/azure/jj943764(v=azure.100) +PasswordLength=${#DBPassword} +if [ $PasswordLength -lt 8 -o $PasswordLength -gt 16 ]; then + echo "Password must be between 8 to 16 characters" + exit 1 +fi + +if [[ "$DBPassword" != *[A-Z]* || "$DBPassword" != *[a-z]* ]]; then + echo "Password must have upper and lower case characters" + exit 1 +fi + +if [[ "$DBPassword" != *[0-9]* ]]; then + echo "Password must contain numbers" + exit 1 +fi + +echo +echo "Create a resource group (if it does not exist for the current subscription)" +echo +az group create \ + --name $ResourceGroup \ + --location westus + +echo +echo "Create an Azure Postgres host on the cheapest SKU. This may take SEVERAL MINUTES..." +echo +az postgres server create \ + --resource-group $ResourceGroup \ + --name $ServerName \ + --location westus \ + --admin-user $DBUserName \ + --admin-password $DBPassword \ + --sku-name B_Gen5_2 \ + --version 9.6 \ + +echo +echo "Create a firewall rule for the local host IP address $Local_IP_Address" +echo +RuleDate=$(date +%F_%H-%M-%S) +az postgres server firewall-rule create \ + --resource-group $ResourceGroup \ + --server-name $ServerName \ + --name "AllowMyIP_$RuleDate" \ + --start-ip-address $Local_IP_Address \ + --end-ip-address $Local_IP_Address + +echo +echo "Create a firewall rule for Azure services" +echo +RuleDate=$(date +%F_%H-%M-%S) +az postgres server firewall-rule create \ + --resource-group $ResourceGroup \ + --server-name $ServerName \ + --name "AzureServices_$RuleDate" \ + --start-ip-address "0.0.0.0" \ + --end-ip-address "0.0.0.0" + +SSL_POSTGRES=${DISABLE_SSL_POSTGRES:="false"} +if $SSL_POSTGRES; then + echo + echo "WARNING: Disabling SSL enforcement on Postgres host." + echo + az postgres server update --resource-group $ResourceGroup --name $ServerName --ssl-enforcement Disabled +fi diff --git a/devops/deployment/Deploy-Python-Functions-App.sh b/devops/deployment/Deploy-Python-Functions-App.sh new file mode 100755 index 00000000..32747ec8 --- /dev/null +++ b/devops/deployment/Deploy-Python-Functions-App.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +#Automation based from instructions hre: https://github.com/Azure/Azure-Functions/wiki/Azure-Functions-on-Linux-Preview + +#Exit on error +set -e + +ResourceGroup=$1 +StorageName=$2 +FunctionAppName=$3 +AppInsightsName=$4 + +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ] || [ -z "$5" ] || [ -z "$6" ] || [ -z "$7" ] || [ -z "$8" ] || [ -z "$9" ] || [ -z "${10}" ] || [ -z "${11}" ] || [ -z "${12}" ] || [ -z "${13}" ]; then + echo "Usage: sh $0 (Azure Resource Group Name) (Azure Function Storage Name) (Azure Function App Name) (AppInsightsName) (Storage account) (Source container) (Dest container) (DB Server Name) (DB Username) (DB Password) (DB Name) (Storage Connection String)" + exit 1 +fi + +STORAGE_ACCOUNT_NAME="$5" +STORAGE_ACCOUNT_KEY="$6" +SOURCE_CONTAINER_NAME="$7" +DESTINATION_CONTAINER_NAME="$8" +DB_HOST="$9" +DB_USER="${10}" +DB_PASS="${11}" +DB_NAME="${12}" +STORAGE_CONNECTION_STRING="${13}" + +StorageNameLength=${#StorageName} +if [ $StorageNameLength -lt 3 -o $StorageNameLength -gt 24 ]; then + echo "Storage account name must be between 3 and 24 characters in length." + exit 1 +fi + +if [[ "$StorageName" != *[a-z0-9]* ]]; then + echo "Storage account name must use numbers and lower-case letters only" + exit 1 +fi + +# See http://jmespath.org/tutorial.html for querying +filtered_output=$(az extension list --query "[?name=='functionapp'].name") + +if [[ $filtered_output =~ "functionapp" ]]; +then + echo + echo "Removing existng Azure CLI extension..." + az extension remove -n functionapp +fi + +TempDownloadLocation="/tmp/functionapp-0.0.2-py2.py3-none-any.whl" + +echo +echo "Downloading Azure CLI extension for the Azure Functions Linux Consumption preview" +echo +curl -s -o $TempDownloadLocation "https://functionscdn.azureedge.net/public/docs/functionapp-0.0.2-py2.py3-none-any.whl" + +echo +echo "Installing Azure CLI extension for the Azure Functions Linux Consumption preview" +echo +az extension add --yes --source $TempDownloadLocation + +echo +echo "Create a resource group (if it does not exist for the current subscription)" +echo +az group create -n $ResourceGroup -l "WestUS" + +echo +echo "Create a storage account for the function (if it does not exist for the current subscription)" +echo +az storage account create -n $StorageName -l "WestUS" -g $ResourceGroup --sku Standard_LRS + +echo +echo "Create a function app (if it does not exist for the current subscription)" +echo +az functionapp createpreviewapp -n $FunctionAppName -g $ResourceGroup -l "WestUS" -s $StorageName --runtime python --is-linux + +echo +echo "Retrieving App Insights Id for $AppInsightsName" +echo +AppInsightsKey=$(az resource show -g $ResourceGroup -n $AppInsightsName --resource-type "Microsoft.Insights/components" --query properties.InstrumentationKey) + +#Remove double quotes +AppInsightsKey=$(sed -e 's/^"//' -e 's/"$//' <<<"$AppInsightsKey") +STORAGE_ACCOUNT_KEY=$(sed -e 's/^"//' -e 's/"$//' <<<"$STORAGE_ACCOUNT_KEY") +STORAGE_CONNECTION_STRING=$(sed -e 's/^"//' -e 's/"$//' <<<"$STORAGE_CONNECTION_STRING") + +echo +echo "Setting application setting on $FunctionAppName" +echo +az functionapp config appsettings set --name $FunctionAppName --resource-group $ResourceGroup \ + --settings "APPINSIGHTS_INSTRUMENTATIONKEY=$AppInsightsKey" \ + "DB_HOST=$DB_HOST" \ + "DB_USER=$DB_USER" \ + "DB_NAME=$DB_NAME" \ + "DB_PASS=$DB_PASS" \ + "STORAGE_ACCOUNT_NAME=$STORAGE_ACCOUNT_NAME" \ + "STORAGE_ACCOUNT_KEY=$STORAGE_ACCOUNT_KEY" \ + "SOURCE_CONTAINER_NAME=$SOURCE_CONTAINER_NAME" \ + "DESTINATION_CONTAINER_NAME=$DESTINATION_CONTAINER_NAME" \ + "STORAGE_CONNECTION_STRING=$STORAGE_CONNECTION_STRING" diff --git a/devops/deployment/Deploy-Storage.sh b/devops/deployment/Deploy-Storage.sh new file mode 100755 index 00000000..c323e5de --- /dev/null +++ b/devops/deployment/Deploy-Storage.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Need commands to fail early. +set -e +set -o pipefail + +if ! [ -x "$(command -v az)" ]; then + echo "Error Azure CLI not installed."; >&2 + echo "See: https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest to install Azure CLI"; >&2 + exit 1 +fi + +if [ -z "$RESOURCE_GROUP" ]; then + echo "Need to set resource group in the environment."; >&2 + exit 1 +fi + +if [ -z "$STORAGE_NAME" ]; then + echo "Need to set storage name in the environment."; >&2 + exit 1 +fi + +#A conditional to choose whether or not to redploy the storage account if it already exists +REDEPLOY_AZURE_STORAGE=${REDEPLOY_STORAGE:="true"} +if $REDEPLOY_AZURE_STORAGE; then + #First see if the storage account exists. + storage_query_result=$(az storage account list -g $RESOURCE_GROUP --query "[?name=='$PROJECT_STORAGE_ACCOUNT'].name") + if [[ $storage_query_result =~ $STORAGE_NAME ]]; + then + echo "Storage account $STORAGE_NAME already exists. Removing..." + az storage account delete -g $RESOURCE_GROUP -n $STORAGE_NAME -y + fi +fi + +echo "Creating Storage Account" + +az storage account create --resource-group $RESOURCE_GROUP --name $STORAGE_NAME --sku Standard_LRS +STORAGE_KEY=$(az storage account keys list -n $STORAGE_NAME --resource-group $RESOURCE_GROUP --query [0].value) + +echo "Creating Temporary Storage Container" +az storage container create -n $STORAGE_TEMP_CONTAINER --account-key $STORAGE_KEY --account-name $STORAGE_NAME --public-access container + +echo "Creating Permanent Storage Container" +az storage container create -n $STORAGE_PERM_CONTAINER --account-key $STORAGE_KEY --account-name $STORAGE_NAME --public-access container + +echo "Creating an onboarding queue" +az storage queue create -n onboardqueue --account-key $STORAGE_KEY --account-name $STORAGE_NAME + +echo "Done!" \ No newline at end of file diff --git a/devops/deployment/ReadMe.md b/devops/deployment/ReadMe.md new file mode 100644 index 00000000..8ca04cab --- /dev/null +++ b/devops/deployment/ReadMe.md @@ -0,0 +1,55 @@ +# Getting Starting With Active Learning Infrastructure + +This directory contains several scripts to install the Active Learning infrastructure on to Azure. A more detailed look at database infrastructure deployment can be found [here](../../db/README.md) + +# Install Options +1. [Easy Install](#step1) +2. [Automated Install](#step2) + +## Easy Install + +The easiest way to get up and running is to manually update the _values_ of the config file [here](config/deployment_config.sh) + +``` +# Project configuration +RESOURCE_GROUP=my-resource-name +RESOURCE_LOCATION=westus +PROJECT_STORAGE_ACCOUNT=actlrnintstor +PROJECT_STORAGE_TEMP_CONTAINER=tempcont +PROJECT_STORAGE_PERM_CONTAINER=permcont + +# Database config +DATABASE_NAME=mydatabasename +DATABASE_SERVER_NAME=mypostgresservername +DATABASE_USERNAME=actlrnadmin +DATABASE_PASSWORD=MyPassword2019 + +# AppInsights config +APPINSIGHTS_NAME=myappinsightsname + +# Azure Function configuration +FUNCTION_STORAGE_ACCOUNT=actlrnintfuncstor +FUNCTION_APP_NAME=actlrnintegration +``` + +Next start a Python (3.5+) __virtual environment__ in the directory of deployment script (this directory). Next run the command below from the same directory: + +``` +. ./Deploy-Infrastructure config/deployment_config.sh +``` + +This command will deploy all the components necessary to accomplish tagging from scratch. + +## Automated Install + +In deployment environments that rely on dynamic environment variables we allow our top level script to be run without config file. SET the environment variables defined [here](config/deployment_config.sh) in your Bash session + +Start a Python (3.5+) __virtual environment__ in the directory of deployment script (this directory). Next run the command below from the same directory: + +``` +. ./Deploy-Infrastructure +``` + +# Azure Pipelines Continuous Deployment Example + +TODO \ No newline at end of file diff --git a/devops/deployment/config/deployment_config.sh b/devops/deployment/config/deployment_config.sh new file mode 100755 index 00000000..4e153bfc --- /dev/null +++ b/devops/deployment/config/deployment_config.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Project configuration +RESOURCE_GROUP=jmsrg1 +RESOURCE_LOCATION=westus +PROJECT_STORAGE_ACCOUNT=jmsactlrnstor1 +PROJECT_STORAGE_TEMP_CONTAINER=tempcont +PROJECT_STORAGE_PERM_CONTAINER=permcont + +# Database config +DATABASE_NAME=jmsactlrndb1 +DATABASE_SERVER_NAME=jmspgsrvr1 +DATABASE_USERNAME=jmsdbadmin +DATABASE_PASSWORD=Th1siS4Pa55Wurd + +# AppInsights config +APPINSIGHTS_NAME=jmsinsightful1 + +# Azure Function configuration +FUNCTION_STORAGE_ACCOUNT=jmsfunstor1 +FUNCTION_APP_NAME=jmsfuncapp1 \ No newline at end of file diff --git a/devops/dsvm/README.md b/devops/dsvm/README.md new file mode 100644 index 00000000..5fc5e74c --- /dev/null +++ b/devops/dsvm/README.md @@ -0,0 +1,47 @@ +# Setting up an Azure DSVM for Active Learning + +This document will explain how to deploy an Azure DSVM and set up the environment for Active Learning. + +## Deployment + +Create an SSH Key on your local machine. The following will create a key in your ~/.ssh/act-learn-key location. + +```sh +$ ssh-keygen -f ~/.ssh/act-learn-key -t rsa -b 2048 +``` + +Secondly edit the environment variables in the [dsvm_config.sh](config/dsvm_config.sh) script with your own values. For instance: + +
+RESOURCE_GROUP=MyAzureResourceGroup
+# VM config
+VM_SKU=Standard_NC6 #Make sure VM SKU is available in your resource group's region 
+VM_IMAGE=microsoft-ads:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest
+VM_DNS_NAME=mytestdns
+VM_NAME=myvmname
+VM_ADMIN_USER=johndoe
+VM_SSH_KEY=~/.ssh/act-learn-key.pub
+
+ +Lastly execute the deploy_dsvm.sh with your edited config file as a parameter. Note that the Azure CLI is required. Install [here](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) if needed. + +```sh +$ sh deploy_dsvm.sh config/dsvm_config.sh +``` + +## Environment Setup +We provide a module that will copy over a shell script to your DSVM and execute the shell script to setup an active learning environment. + +We require that your SSH key be added to the SSH agent. To add your SSH key to the SSH agent use the **_ssh-add_** command + +```sh +$ ssh-add -K ~/.ssh/act-learn-key +``` + +To copy and execute the shell script on the DSVM use the following command + +```sh +$ python setup-tensorflow.py --host admin@127.0.0.1 -k ~/.ssh/act-learn-key -s setup-tensorflow.sh +``` + +Note that in the host argument **_admin_**@127.0.0.1 section is the DSVM Admin name and admin@**_127.0.0.1_** is the IP address of the DSVM. diff --git a/devops/dsvm/config/dsvm_config.sh b/devops/dsvm/config/dsvm_config.sh new file mode 100644 index 00000000..3b266898 --- /dev/null +++ b/devops/dsvm/config/dsvm_config.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# System config +RESOURCE_GROUP=jmsrg1 + +# VM config +VM_SKU=Standard_NC6 +VM_IMAGE=microsoft-ads:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest +VM_DNS_NAME=jmsactlrnvm +VM_NAME=jmsactlrnvm +VM_ADMIN_USER=vmadmin +VM_SSH_KEY=~/.ssh/act-learn-key.pub \ No newline at end of file diff --git a/devops/dsvm/deploy_dsvm.sh b/devops/dsvm/deploy_dsvm.sh new file mode 100755 index 00000000..e43d8cd1 --- /dev/null +++ b/devops/dsvm/deploy_dsvm.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Within the configuration file, there is a need for an SSH key. To generate an SSH +# key on Linux, one uses the ssk-keygen command. The format is: +# +# ssh-keygen -f ~/.ssh/act-learn-key -t rsa -b 2048 + +# Check if any of the args are empty +if [ -z "$1" ]; then + echo "Usage: 'sh $0 (configuration file)'" + exit 1 +fi + +# Does the configuration exist? +if [ ! -e "$1" ]; then + echo "Unable to find configuration file -- $1" + exit 1 +fi + +# Read in the configuration +. $1 + +# Check and see if Azure CLI is present +az --version > /dev/null +if [ "$?" -ne "0" ]; then + echo "Unable to find azure CLI" + exit 1 +fi + +# Is the ssh key present? +if [ ! -e "$VM_SSH_KEY" ]; then + echo "SSH key file does not exist -- $VM_SSH_KEY" + exit 1 +fi + +# Does the resource group exist +RESOURCE_GROUP_PRESENT=`az group exists --name $RESOURCE_GROUP` +if [ "$RESROUCE_GROUP_PRESENT" == "false" ]; then + echo "Resource group does not exist -- $RESOURCE_GROUP" + exit 1 +fi + +az vm create \ + --resource-group $RESOURCE_GROUP \ + --name $VM_NAME \ + --admin-username $VM_ADMIN_USER \ + --public-ip-address-dns-name $VM_DNS_NAME \ + --image $VM_IMAGE \ + --size $VM_SKU \ + --ssh-key-value $VM_SSH_KEY +if [ "$?" -ne "0" ]; then + echo "Unable to provision DSVM" + exit 1 +fi diff --git a/devops/dsvm/setup-tensorflow.py b/devops/dsvm/setup-tensorflow.py new file mode 100644 index 00000000..abff23eb --- /dev/null +++ b/devops/dsvm/setup-tensorflow.py @@ -0,0 +1,47 @@ +import os +import argparse +from fabric import * + +def get_connection(dsvm_host,ssh_key_path): + #http://docs.fabfile.org/en/2.4/api/connection.html + return Connection(host=dsvm_host,connect_kwargs={"key_filename": ssh_key_path}) + +def main(host,ssh_key_path,script_path): + #ip_address = 'abrig@13.68.227.63' + #local_ssh_key_path = '/Users/andrebriggs/.ssh/act-learn-key' + #tf_setup_script = "setup-tensorflow.sh" + + no_errors = True + try: + with get_connection(host,ssh_key_path) as c: + result = c.run("rm -f ~/{0}".format(script_path)) + print("Ran {0.command!r} on {0.connection.host}, got stdout:\n{0.stdout}".format(result)) + print("Copying {0} to remote machine at {1}".format(script_path,host)) + c.put(script_path) + print("Executing {0} on remote machine".format(script_path)) + result = c.run("sh {0}".format(script_path)) + except Exception as e: + print(str(e)) + no_errors = False + finally: + return no_errors + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Sets up an active learning environment on a DSVM") + + parser.add_argument('--host', default=os.environ.get('DSVM_HOST', None), + help="A host name in the form of (UserName)@IpAddress (e.g. admin@127.0.0.1)") + + parser.add_argument('-k','--sshKeyPath', default=os.environ.get('DSVM_SSH_KEY_PATH', None), + help="Path to local private key for VM. (e.g. /Users/JohnDoe/.ssh/act-learn-key). Be sure run ssh-add -K ~/.ssh/act-learn-key first") + + parser.add_argument('-s','--scriptPath', default=os.environ.get('DSVM_SCRIPT', None), + help="Path to script that will be copied to DSVM and executed") + + args = parser.parse_args() + + if not args.host or not args.sshKeyPath or not args.scriptPath: + exit(parser.print_usage()) + + main(args.host,args.sshKeyPath,args.scriptPath) + diff --git a/devops/dsvm/setup-tensorflow.sh b/devops/dsvm/setup-tensorflow.sh new file mode 100644 index 00000000..e1425f6d --- /dev/null +++ b/devops/dsvm/setup-tensorflow.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# +#This script automates the instructions from here: +#https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md +# + +#Fail on first error +set -e +#Suppress expanding variables before printing. +set +x +set +v + +#When executing on a DSVM over SSH some paths for pip, cp, make, etc may not be in the path, +export PATH=/anaconda/envs/py35/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin:/opt/caffe/build/install/bin/:/usr/local/cuda/bin:/dsvm/tools/cntk/cntk/bin:/usr/local/cuda/bin:/dsvm/tools/cntk/cntk/bin:/dsvm/tools/spark/current/bin:/opt/mssql-tools/bin:/bin + +echo -e '\n*******\tClone Tensorflow Models\t*******\n' +git clone https://github.com/tensorflow/models.git repos/models + +echo -e '\n*******\tInstall Tensorflow package\t*******\n' +cd repos/models/ && pip install tensorflow-gpu + +echo -e '\n*******\tInstall COCO API\t*******\n' +cd ~/ +git clone https://github.com/cocodataset/cocoapi.git repos/cocoapi +cd repos/cocoapi/PythonAPI/ +make +cp -r pycocotools ~/repos/models/research/ + +echo -e '\n*******\tSetup Protocal Buffer\t******\n' +cd ~/ +cd repos/models/research/ +wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip +unzip -o protobuf.zip +./bin/protoc object_detection/protos/*.proto --python_out=. + +echo -e '\n*******\tSetup Python Path\t******\n' +export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim + +echo -e '\n*******\tRunning Object Detection Tests\t******\n' +python object_detection/builders/model_builder_test.py + +echo -e '\n*******\tClone Active Learning\t*******\n' +git clone https://github.com/CatalystCode/active-learning-detect + +echo -e '\n*******\tInstalling Python Packages\t*******\n' +cd repos/models/research/active-learning-detect +pip install -r requirements.txt + +#Update the config.ini file at repos/models/research/active-learning-detect +echo -e 'Objection dectection install validation complete' \ No newline at end of file diff --git a/functions/README.md b/functions/README.md new file mode 100644 index 00000000..d0610ecb --- /dev/null +++ b/functions/README.md @@ -0,0 +1,211 @@ +## Deploying functions to a Azure Function App and Add Settings + +This tutorial talks about how to deploy a function to an Azure Function Application. For +the example, we will use the function(s) within the `pipeline` application located in this +directory. + +In order to set up the Azure Function Application, we will use [this](../tutorial/functions/docs/setup/initial/create_function_app.sh) script. It should be noted, that +this example also assumes that one has properly setup their environment for working with +Azure Functions, including installing the Azure Function Core Tools. That setup is discussed [here](../tutorial/functions/docs/setup/initial/README.md). + +### Creating the application + +Leveraging the `create_function_app.sh` script the application will be created for +us. The script requires four pieces of information: + +- Resource group name +- Resource group location +- Storage account name for the function application +- The name of the function application itself + +Note, the resource group as well as the storage account will be created by the script. + +Running the script: + +```bash +export RESOURCE_GROUP=jmsactlrnrg +export RESOURCE_GROUP_LOCATION=westus +export STORAGE_ACCOUNT_NAME=jmslrnpipe +export FUNCTION_APP_NAME=jmsactlrnpipeline + +jims@functional:~/code/src/github/jmspring/active-learning-detect$ ./tutorial/functions/docs/setup/initial/create_function_app.sh $RESOURCE_GROUP $RESOURCE_GROUP_LOCATION $STORAGE_ACCOUNT_NAME $FUNCTION_APP_NAME +{ + "id": "/subscriptions/3fee811e-11bf-abcd-9c62-adbeef517724/resourceGroups/jmsactlrnrg", + "location": "westus", + "managedBy": null, + "name": "jmsactlrnrg", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null +} +{ + "accessTier": null, + "creationTime": "2018-10-28T03:36:16.816514+00:00", + "customDomain": null, + "enableHttpsTrafficOnly": false, + "encryption": { + "keySource": "Microsoft.Storage", + "keyVaultProperties": null, + "services": { + "blob": { + "enabled": true, + "lastEnabledTime": "2018-10-28T03:36:16.894642+00:00" + }, + "file": { + "enabled": true, + "lastEnabledTime": "2018-10-28T03:36:16.894642+00:00" + }, + "queue": null, + "table": null + } + }, + "id": "/subscriptions/3fee811e-11bf-abcd-9c62-adbeef517724/resourceGroups/jmsactlrnrg/providers/Microsoft.Storage/storageAccounts/jmslrnpipe", + "identity": null, + "isHnsEnabled": null, + "kind": "Storage", + "lastGeoFailoverTime": null, + "location": "westus", + "name": "jmslrnpipe", + "networkRuleSet": { + "bypass": "AzureServices", + "defaultAction": "Allow", + "ipRules": [], + "virtualNetworkRules": [] + }, + "primaryEndpoints": { + "blob": "https://jmslrnpipe.blob.core.windows.net/", + "dfs": null, + "file": "https://jmslrnpipe.file.core.windows.net/", + "queue": "https://jmslrnpipe.queue.core.windows.net/", + "table": "https://jmslrnpipe.table.core.windows.net/", + "web": null + }, + "primaryLocation": "westus", + "provisioningState": "Succeeded", + "resourceGroup": "jmsactlrnrg", + "secondaryEndpoints": null, + "secondaryLocation": null, + "sku": { + "capabilities": null, + "kind": null, + "locations": null, + "name": "Standard_LRS", + "resourceType": null, + "restrictions": null, + "tier": "Standard" + }, + "statusOfPrimary": "available", + "statusOfSecondary": null, + "tags": {}, + "type": "Microsoft.Storage/storageAccounts" +} +Your Linux, cosumption plan, function app 'jmsactlrnpipeline' has been successfully created but is not active until content is published usingAzure Portal or the Functions Core Tools. +``` + +At this point, you have an Azure Function Application to which you can publish your functions to. + +### Configuring the Application Environment + +In a number of cases, one will need to set environment variables for their azure function to use. The following +shows setting up the required variables for accessing a Postgres database that the data layer of the `pipeline` +application uses. + +```bash +export DB_HOST="" +export DB_USER="" + }, + { + "name": "AzureWebJobsDashboard", + "slotSetting": false, + "value": "" + }, + { + "name": "WEBSITE_CONTENTAZUREFILECONNECTIONSTRING", + "slotSetting": false, + "value": "" + }, + { + "name": "WEBSITE_CONTENTSHARE", + "slotSetting": false, + "value": "jmsactlrnpipeline" + }, + { + "name": "DB_HOST", + "slotSetting": false, + "value": " + }, + { + "name": "DB_USER", + "slotSetting": false, + "value": "" + }, + { + "name": "DB_PASS", + "slotSetting": false, + "value": "" + } +] +``` + +### Deploying a function to the application + +Once you have your configuration, it is time to deploy the application itself. You use the +Azure Function Core Utils tools to publish your functions into the Azure Function Application +created and configured above. That looks like: + +```bash +jims$ func azure functionapp publish $FUNCTION_APP_NAME --force +Getting site publishing info... +pip download -r /home/jims/code/src/github/jmspring/active-learning-detect/functions/pipeline/requirements.txt --dest /tmp/azureworkertczxe16l +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 azure_functions==1.0.0a5 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 azure_functions_worker==1.0.0a6 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 pg8000==1.12.3 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 setuptools==40.5.0 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 grpcio_tools==1.14.2 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 six==1.11.0 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 grpcio==1.14.2 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworker40w5hod2 protobuf==3.6.1 + +Preparing archive... +Uploading content... +Upload completed successfully. +Deployment completed successfully. +Removing 'WEBSITE_CONTENTSHARE' from 'jmsactlrnpipeline' +Removing 'WEBSITE_CONTENTAZUREFILECONNECTIONSTRING' from 'jmsactlrnpipeline' +Syncing triggers... +Functions in jmsactlrnpipeline: + download - [httpTrigger] + Invoke url: https://jmsactlrnpipeline.azurewebsites.net/api/download?code=AARPr45D5K6AIEWv8bEaqWalSaddrUzd4aydOxmhSPauGUrsPvzw== +``` + +Showing our function running: + +```bash +curl "https://jmsactlrnpipeline.azurewebsites.net/api/download?code=AARPr45D5K6AIEWv8bEaqWalSaddrUzd4aydOxmhSPauGUrsPvzw==&imageCount=1" +["https://csehackstorage.blob.core.windows.net/image-to-tag/1.jpg"] +``` \ No newline at end of file diff --git a/functions/__init__.py b/functions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/functions/pipeline/__init__.py b/functions/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/functions/pipeline/classification/__init__.py b/functions/pipeline/classification/__init__.py new file mode 100644 index 00000000..288c47f1 --- /dev/null +++ b/functions/pipeline/classification/__init__.py @@ -0,0 +1,45 @@ +import logging +import azure.functions as func +import json +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageTagState, PredictionLabel + + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + # setup response object + headers = { + "content-type": "application/json" + } + user_name = req.params.get('userName') + classes_list = req.params.get("className") + if not classes_list: + return func.HttpResponse( + status_code=401, + headers=headers, + body=json.dumps({"error": "invalid classes list given or omitted"}) + ) + elif not user_name: + return func.HttpResponse( + status_code=401, + headers=headers, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) + try: + # DB configuration + data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) + + class_mapping = data_access.get_classification_map(set(classes_list.split(',')), user_id) + logging.debug("Got classes mapping: " + str(class_mapping)) + + return func.HttpResponse( + status_code=200, + headers=headers, + body=json.dumps(class_mapping) + ) + except Exception as e: + return func.HttpResponse( + "exception:" + str(e), + status_code=500 + ) \ No newline at end of file diff --git a/functions/pipeline/classification/function.json b/functions/pipeline/classification/function.json new file mode 100644 index 00000000..f6425972 --- /dev/null +++ b/functions/pipeline/classification/function.json @@ -0,0 +1,20 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "get" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] + } + \ No newline at end of file diff --git a/functions/pipeline/classification/host.json b/functions/pipeline/classification/host.json new file mode 100644 index 00000000..1c8bb37a --- /dev/null +++ b/functions/pipeline/classification/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" + } \ No newline at end of file diff --git a/functions/pipeline/extensions.csproj b/functions/pipeline/extensions.csproj new file mode 100644 index 00000000..c4e99e98 --- /dev/null +++ b/functions/pipeline/extensions.csproj @@ -0,0 +1,11 @@ + + + netstandard2.0 + + ** + + + + + + \ No newline at end of file diff --git a/functions/pipeline/host.json b/functions/pipeline/host.json new file mode 100644 index 00000000..83a92167 --- /dev/null +++ b/functions/pipeline/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" +} \ No newline at end of file diff --git a/functions/pipeline/images/__init__.py b/functions/pipeline/images/__init__.py new file mode 100644 index 00000000..95be31b0 --- /dev/null +++ b/functions/pipeline/images/__init__.py @@ -0,0 +1,93 @@ +import logging + +import azure.functions as func +import json +import jsonpickle +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageTagState +from ..shared.storage_utils import get_signed_url_for_permstore_blob + + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + + image_count = req.params.get('imageCount') + user_name = req.params.get('userName') + tag_status = req.params.get('tagStatus') + image_ids = req.params.get('imageId') + checkout = req.params.get('checkOut') + + # setup response object + headers = { + "content-type": "application/json" + } + if not user_name: + return func.HttpResponse( + status_code=401, + headers=headers, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) + elif not tag_status and not image_ids and not checkout: + return func.HttpResponse( + status_code=400, + headers=headers, + body=json.dumps({"error": "either of tag status or images ids needs to be specified if not checking out images for download"}) + ) + elif checkout and checkout.lower() == "true" and not image_count: + return func.HttpResponse( + status_code=400, + headers=headers, + body=json.dumps({"error": "image count needs to be specified when checking out images"}) + ) + else: + try: + # DB configuration + data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) + + # This offers download api functionality to check out n images. + # We ignore the rest of query params when checkOut is set to true. + if checkout and checkout.lower() == "true": + image_count = int(image_count) + checked_out_images = data_access.checkout_images(image_count, user_id) + existing_classifications_list = data_access.get_existing_classifications() + # update image locations to signed urls + for image in checked_out_images: + signed_url_location = get_signed_url_for_permstore_blob(image.imagelocation) + image.imagelocation = signed_url_location + return_body_json = { + "images": jsonpickle.encode(checked_out_images, unpicklable=False), + "classification_list": existing_classifications_list + } + return func.HttpResponse( + status_code=200, + headers=headers, + body=json.dumps(return_body_json) + ) + + # Get images info + if image_ids: + image_infos = data_access.get_image_info_for_image_ids(image_ids.split(',')) + elif tag_status: + if image_count: + image_count = int(image_count) + images_by_tag_status = data_access.get_images_by_tag_status(tag_status.split(','), image_count) + logging.debug("Received {0} images in tag status {1}".format(len(images_by_tag_status),tag_status)) + image_infos = data_access.get_image_info_for_image_ids(list(images_by_tag_status.keys())) + + # For each image_info in image_infos, update image_info.location a signed url. + for image_info in image_infos: + signed_url_location = get_signed_url_for_permstore_blob(image_info['location']) + image_info['location'] = signed_url_location + + content = json.dumps(image_infos) + return func.HttpResponse( + status_code=200, + headers=headers, + body=content + ) + except Exception as e: + return func.HttpResponse( + "exception:" + str(e), + status_code=500 + ) diff --git a/functions/pipeline/images/function.json b/functions/pipeline/images/function.json new file mode 100644 index 00000000..f6425972 --- /dev/null +++ b/functions/pipeline/images/function.json @@ -0,0 +1,20 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "get" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] + } + \ No newline at end of file diff --git a/functions/pipeline/images/host.json b/functions/pipeline/images/host.json new file mode 100644 index 00000000..b9f92c0d --- /dev/null +++ b/functions/pipeline/images/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" +} \ No newline at end of file diff --git a/functions/pipeline/labels/__init__.py b/functions/pipeline/labels/__init__.py new file mode 100644 index 00000000..ae060af8 --- /dev/null +++ b/functions/pipeline/labels/__init__.py @@ -0,0 +1,121 @@ +import logging +import jsonpickle +from collections import namedtuple +import azure.functions as func +import json +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageTagState, PredictionLabel, ImageTag + +DEFAULT_RETURN_HEADER= { "content-type": "application/json" } + +# GET returns all human annotated labels +# POST calls with upload=true flag save all human annotated labels +# POST calls with trainingId param save predicted labels +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + + user_name = req.params.get('userName') + training_id = req.params.get("trainingId") + upload = req.params.get("upload") + + if not user_name: + return func.HttpResponse( + status_code=401, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) + elif req.method == "POST" and not upload and not training_id: + return func.HttpResponse( + status_code=401, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"error": "trainingId or upload flag needs to be specified with a POST request"}) + ) + else: + try: + # DB configuration + data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) + + logging.debug("User '{0}' invoked labels api".format(user_name)) + + if req.method == "GET": + # Note: Currently we return all human annotated labels since TAGGING.CSV requires all rows + # No use case to return predicted labels at the moment. + labels = data_access.get_labels() + + #Encode the complex object nesting + content = jsonpickle.encode(labels,unpicklable=False) + return func.HttpResponse( + status_code=200, + headers=DEFAULT_RETURN_HEADER, + body=content + ) + elif req.method == "POST" and upload and upload.lower() == "true": + try: + upload_data = req.get_json() + except ValueError as ve: + logging.error("Error: Unable to decode POST body. Error: " + repr(ve)) + return func.HttpResponse( + status_code=401, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "Unable to decode POST body."}) + ) + __upload_tag_data(upload_data, data_access, user_id) + + return func.HttpResponse( + body=json.dumps(upload_data), + status_code=201, + headers=DEFAULT_RETURN_HEADER, + ) + elif req.method == "POST" and training_id: + payload = json.loads(req.get_body()) + if not training_id: + return func.HttpResponse( + status_code=401, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"error": "invalid training_id given or omitted"}) + ) + training_id = int(training_id) + payload_json = [namedtuple('PredictionLabel', item.keys())(*item.values()) for item in payload] + data_access.add_prediction_labels(payload_json, training_id) + return func.HttpResponse( + status_code=201, + headers=DEFAULT_RETURN_HEADER + ) + + except Exception as e: + return func.HttpResponse( + "exception:" + str(e), + status_code=500 + ) + +def __upload_tag_data(upload_data, data_access, user_id): + ids_to_tags = upload_data["imageIdToTags"] + + all_imagetags = [] + for image_id in ids_to_tags.keys(): + if ids_to_tags[image_id]: + all_imagetags.extend(__create_ImageTag_list(image_id, ids_to_tags[image_id])) + + unique_class_names = upload_data["uniqueClassNames"] + if all_imagetags and unique_class_names: + logging.info("Update all visited images with tags and set state to completed") + class_map = data_access.get_classification_map(unique_class_names,user_id) + annotated_labels = data_access.convert_to_annotated_label(all_imagetags,class_map) + data_access.update_tagged_images_v2(annotated_labels,user_id) + else: + logging.info("No tagged image ids or classifications received") + + logging.info("Update visited but no tags identified images") + data_access.update_completed_untagged_images(upload_data["imagesVisitedNoTag"], user_id) + + logging.info("Update unvisited/incomplete images") + data_access.update_incomplete_images(upload_data["imagesNotVisited"], user_id) + +# Create list of ImageTag objects to write to db for given image_id +def __create_ImageTag_list(image_id, tags_list): + image_tags = [] + for tag in tags_list: + image_tags.append(ImageTag(image_id, tag['x1'], tag['x2'], tag['y1'], tag['y2'], tag['classes'])) + + return image_tags \ No newline at end of file diff --git a/functions/pipeline/labels/function.json b/functions/pipeline/labels/function.json new file mode 100644 index 00000000..a33d0f5d --- /dev/null +++ b/functions/pipeline/labels/function.json @@ -0,0 +1,21 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "get", + "post" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] + } + \ No newline at end of file diff --git a/functions/pipeline/labels/host.json b/functions/pipeline/labels/host.json new file mode 100644 index 00000000..1c8bb37a --- /dev/null +++ b/functions/pipeline/labels/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" + } \ No newline at end of file diff --git a/functions/pipeline/local.settings.json b/functions/pipeline/local.settings.json new file mode 100644 index 00000000..0c027522 --- /dev/null +++ b/functions/pipeline/local.settings.json @@ -0,0 +1,16 @@ +{ + "IsEncrypted": false, + "Values": { + "FUNCTIONS_WORKER_RUNTIME": "python", + "AzureWebJobsStorage": "{AzureWebJobsStorage}", + "STORAGE_CONNECTION_STRING": "", + "STORAGE_ACCOUNT_NAME": "", + "STORAGE_ACCOUNT_KEY": "", + "SOURCE_CONTAINER_NAME": "", + "DESTINATION_CONTAINER_NAME": "", + "DB_HOST": "", + "DB_NAME": "", + "DB_PASS": "", + "DB_USER": "" + } +} diff --git a/functions/pipeline/onboardcontainer/__init__.py b/functions/pipeline/onboardcontainer/__init__.py new file mode 100644 index 00000000..273e38c9 --- /dev/null +++ b/functions/pipeline/onboardcontainer/__init__.py @@ -0,0 +1,111 @@ +import os +import logging +import json +import azure.functions as func +from urlpath import URL +from datetime import datetime, timedelta +from ..shared.constants import ImageFileType +from ..shared.storage_utils import get_filepath_from_url + +from azure.storage.blob import BlockBlobService, BlobPermissions +from azure.storage.queue import QueueService, QueueMessageFormat + +DEFAULT_RETURN_HEADER = { + "content-type": "application/json" +} + + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + + user_name = req.params.get('userName') + + if not user_name: + return func.HttpResponse( + status_code=401, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) + + try: + req_body = req.get_json() + logging.debug(req.get_json()) + storage_account = req_body["storageAccount"] + storage_account_key = req_body["storageAccountKey"] + storage_container = req_body["storageContainer"] + except ValueError: + return func.HttpResponse( + "ERROR: Unable to decode POST body", + status_code=400 + ) + + if not storage_container or not storage_account or not storage_account_key: + return func.HttpResponse( + "ERROR: storage container/account/key/queue not specified.", + status_code=401 + ) + + # Create blob service for storage account (retrieval source) + blob_service = BlockBlobService( + account_name=storage_account, + account_key=storage_account_key) + + # Queue service for perm storage and queue + queue_service = QueueService( + account_name=os.getenv('STORAGE_ACCOUNT_NAME'), + account_key=os.getenv('STORAGE_ACCOUNT_KEY') + ) + + queue_service.encode_function = QueueMessageFormat.text_base64encode + + try: + blob_list = [] + + for blob_object in blob_service.list_blobs(storage_container): + blob_url = URL( + blob_service.make_blob_url( + storage_container, + blob_object.name + ) + ) + # Check for supported image types here. + if ImageFileType.is_supported_filetype(blob_url.suffix): + logging.debug("INFO: Building sas token for blob " + blob_object.name) + # create sas signature + sas_signature = blob_service.generate_blob_shared_access_signature( + storage_container, + blob_object.name, + BlobPermissions.READ, + datetime.utcnow() + timedelta(hours=1) + ) + + logging.debug("INFO: have sas signature {}".format(sas_signature)) + + signed_url = blob_url.with_query(sas_signature) + + blob_list.append(signed_url.as_uri()) + + logging.debug("INFO: Built signed url: {}".format(signed_url)) + + msg_body = { + "imageUrl": signed_url.as_uri(), + "fileName": str(blob_url.name), + "fileExtension": str(blob_url.suffix), + "directoryComponents": get_filepath_from_url(blob_url, storage_container), + "userName": user_name + } + + body_str = json.dumps(msg_body) + queue_service.put_message("onboardqueue", body_str) + else: + logging.info("Blob object not supported. Object URL={}".format(blob_url.as_uri)) + + return func.HttpResponse( + status_code=202, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps(blob_list) + ) + except Exception as e: + logging.error("ERROR: Could not build blob object list. Exception: " + str(e)) + return func.HttpResponse("ERROR: Could not get list of blobs in storage_container={0}. Exception={1}".format( + storage_container, e), status_code=500) \ No newline at end of file diff --git a/functions/pipeline/onboardcontainer/function.json b/functions/pipeline/onboardcontainer/function.json new file mode 100644 index 00000000..852fa7b0 --- /dev/null +++ b/functions/pipeline/onboardcontainer/function.json @@ -0,0 +1,19 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "post" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} \ No newline at end of file diff --git a/functions/pipeline/onboardcontainer/host.json b/functions/pipeline/onboardcontainer/host.json new file mode 100644 index 00000000..81e35b7b --- /dev/null +++ b/functions/pipeline/onboardcontainer/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" +} \ No newline at end of file diff --git a/functions/pipeline/onboarding/__init__.py b/functions/pipeline/onboarding/__init__.py new file mode 100644 index 00000000..15f52849 --- /dev/null +++ b/functions/pipeline/onboarding/__init__.py @@ -0,0 +1,142 @@ +import os +import logging +import json +import azure.functions as func +from urllib.request import urlopen +from PIL import Image +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageInfo +from ..shared.onboarding import copy_images_to_permanent_storage, delete_images_from_temp_storage +from azure.storage.blob import BlockBlobService + +DEFAULT_RETURN_HEADER= { "content-type": "application/json" } + +COPY_SOURCE = os.getenv('SOURCE_CONTAINER_NAME') +COPY_DESTINATION = os.getenv('DESTINATION_CONTAINER_NAME') +ACCOUNT_NAME=os.getenv('STORAGE_ACCOUNT_NAME') +ACCOUNT_KEY=os.getenv('STORAGE_ACCOUNT_KEY') + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + + user_name = req.params.get('userName') + + if not user_name: + return func.HttpResponse( + status_code=400, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "userName query parameter invalid or omitted."}) + ) + + try: + bodyJson = req.get_json() + logging.info("Request json: {}".format(bodyJson)) + if "imageUrls" not in bodyJson: + raise ValueError("invalid request body") + raw_url_list = bodyJson["imageUrls"] + except ValueError as ve: + logging.error("Error: Unable to decode POST body. Error: " + repr(ve)) + return func.HttpResponse( + status_code=400, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "Unable to decode POST body."}) + ) + + if not raw_url_list: + logging.error("Error: URL list empty.") + return func.HttpResponse( + status_code=400, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "URL list empty."}) + ) + + # Check to ensure image URLs sent by client are all unique. + url_list = set(raw_url_list) + + try: + image_object_list = build_objects_from_url_list(url_list) + except Exception as e: + logging.error("Error: Could not build image object list. Exception: " + str(e)) + return func.HttpResponse( + status_code=400, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "Could not build image object list. Exception: " + str(e)}) + ) + + try: + data_access = ImageTagDataAccess(get_postgres_provider()) + except Exception as e: + logging.error("Error: Database connection failed. Exception: " + str(e)) + return func.HttpResponse( + status_code=500, + headers=DEFAULT_RETURN_HEADER, + body=json.dumps({"Error": "Database connection failed. Exception: " + str(e)}) + ) + + # Create/look up username in database and retrieve user_id number + user_id= data_access.create_user(user_name) + logging.info("User ID for {0} is {1}".format(user_name, user_id)) + + # Add the images to the database and retrieve their image ID's + logging.info("Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's") + image_id_url_map = data_access.add_new_images(image_object_list,user_id) + + # Create blob service for storage account + blob_service = BlockBlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) + + # Copy images from temporary to permanent storage. Receive back a list of the copy operations that succeeded and failed. + # Note: Format for copy_succeeded_dict and copy_error_dict is { sourceURL : destinationURL } + copy_succeeded_dict, copy_error_dict = copy_images_to_permanent_storage(image_id_url_map, COPY_SOURCE, COPY_DESTINATION, blob_service) + + # Update URLs in DB for images that were successfully copied + logging.info("Now updating URLs in the DB for images that were successfully copied...") + # Build new image_id_url_map containing images that were successfully copied + update_urls_dictionary = {} + for key in copy_succeeded_dict.keys(): + destination_url = copy_succeeded_dict[key] + filename = str(destination_url).split('/')[-1] + image_id_to_update = int(filename.split('.')[0]) + update_urls_dictionary[image_id_to_update] = str(destination_url) + data_access.update_image_urls(update_urls_dictionary, user_id) + logging.info("Done.") + + # Delete images from temporary storage. Receive back a list of the delete operations that succeeded and failed. + # Note: Format for delete_succeeded_dict and delete_error_dict is { sourceURL : destinationURL } + logging.info("Now deleting images from temp storage...") + delete_succeeded_dict, delete_error_dict = delete_images_from_temp_storage(copy_succeeded_dict, COPY_SOURCE, blob_service) + logging.info("Done.") + + # If both error_dicts are empty, return a 200 OK status code. + # If copy_error_dict or delete_error_dict contains any items, build a JSON object for HTTP response + # and return a bad status code indicating that one or more images failed. + if not copy_error_dict and not delete_error_dict: + content = json.dumps({"Success": "Transfer of all images complete."}) + return func.HttpResponse( + status_code=200, + headers=DEFAULT_RETURN_HEADER, + body=content + ) + else: + content = json.dumps({ + "copy_failed":dict(copy_error_dict), + "delete_failed":dict(delete_error_dict) + }) + return func.HttpResponse( + status_code=500, + headers=DEFAULT_RETURN_HEADER, + body=content + ) + +# Given a list of image URL's, build an ImageInfo object for each, and return a list of these image objects. +def build_objects_from_url_list(url_list): + image_object_list = [] + for url in url_list: + # Split original image name from URL + original_filename = url.split("/")[-1] + # Create ImageInfo object (def in db_access.py) + with Image.open(urlopen(url)) as img: + width, height = img.size + image = ImageInfo(original_filename, url, height, width) + # Append image object to the list + image_object_list.append(image) + return image_object_list diff --git a/functions/pipeline/onboarding/function.json b/functions/pipeline/onboarding/function.json new file mode 100644 index 00000000..852fa7b0 --- /dev/null +++ b/functions/pipeline/onboarding/function.json @@ -0,0 +1,19 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "post" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} \ No newline at end of file diff --git a/functions/pipeline/onboarding/host.json b/functions/pipeline/onboarding/host.json new file mode 100644 index 00000000..81e35b7b --- /dev/null +++ b/functions/pipeline/onboarding/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" +} \ No newline at end of file diff --git a/functions/pipeline/onboarding/urlList.json b/functions/pipeline/onboarding/urlList.json new file mode 100644 index 00000000..b1a20e7c --- /dev/null +++ b/functions/pipeline/onboarding/urlList.json @@ -0,0 +1,3 @@ +{ "imageUrls": ["http://www.whitneyway.com/Images/15/2017%20Puppies%20in%20Easter%20basket%204-16-17_800.JPG", + "http://allpetcages.com/wp-content/uploads/2017/06/puppy-whelping-box.jpg", + "http://78.media.tumblr.com/eea2f882ec08255e40cecaf8ca1d4543/tumblr_nmxjbjIK141qi4ucgo1_500.jpg"] } \ No newline at end of file diff --git a/functions/pipeline/onboardqueueproccessor/__init__.py b/functions/pipeline/onboardqueueproccessor/__init__.py new file mode 100644 index 00000000..cd9db6e9 --- /dev/null +++ b/functions/pipeline/onboardqueueproccessor/__init__.py @@ -0,0 +1,97 @@ +import os +import json +import logging +import azure.functions as func + +from urllib.request import urlopen + +from PIL import Image +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageInfo +from azure.storage.blob import BlockBlobService + + +def main(msg: func.QueueMessage) -> None: + logging.info('Python queue trigger function processed a queue item: %s', + msg.get_body().decode('utf-8')) + + queue_msg = json.dumps({ + 'id': msg.id, + 'body': msg.get_body().decode('utf-8'), + 'expiration_time': (msg.expiration_time.isoformat() + if msg.expiration_time else None), + 'insertion_time': (msg.insertion_time.isoformat() + if msg.insertion_time else None), + 'time_next_visible': (msg.time_next_visible.isoformat() + if msg.time_next_visible else None), + 'pop_receipt': msg.pop_receipt, + 'dequeue_count': msg.dequeue_count + }) + + logging.debug(queue_msg) + + try: + msg_json = json.loads(msg.get_body().decode('utf-8')) + + img_url = msg_json['imageUrl'] + user_name = msg_json["userName"] + original_filename = msg_json['fileName'] + filetype = msg_json['fileExtension'] + original_file_directory = msg_json['directoryComponents'] + + # Only 1 object in this list for now due to single message processing. + image_object_list = [] + + with Image.open(urlopen(img_url)) as img: + width, height = img.size + + image = ImageInfo(original_filename, img_url, height, width) + # Append image object to the list + image_object_list.append(image) + + data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) + + logging.debug("Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's") + image_id_url_map = data_access.add_new_images(image_object_list, user_id) + + copy_destination = os.getenv('DESTINATION_CONTAINER_NAME') + + # Create blob service for storage account + blob_service = BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), + account_key=os.getenv('STORAGE_ACCOUNT_KEY')) + + # Copy images to permanent storage and get a dictionary of images for which to update URLs in DB. + # and a list of failures. If the list of failures contains any items, return a status code other than 200. + + image_id = list(image_id_url_map.values())[0] + new_blob_name = (str(image_id) + filetype) + + response = urlopen(img_url) + + image_bytes = response.read() + + # Per Azure notes https://docs.microsoft.com/en-us/azure/storage/blobs/storage-properties-metadata: + # The name of your metadata must conform to the naming conventions for C# identifiers. Dashes do not work. + # Azure blob is also setting the keys to full lowercase. + blob_metadata = { + "userFilePath": original_file_directory, + "originalFilename": original_filename, + "uploadUser": user_name + } + + blob_create_response = blob_service.create_blob_from_bytes(copy_destination, new_blob_name, image_bytes, metadata=blob_metadata) + update_urls_dictionary = {image_id: blob_service.make_blob_url(copy_destination, new_blob_name)} + + # Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied. + if not blob_create_response: + logging.error("ERROR: Image copy/delete operation failed. Check state of images in storage.") + else: + logging.debug("Now updating permanent URLs in the DB...") + data_access.update_image_urls(update_urls_dictionary, user_id) + + # content = json.dumps({"imageUrls": list(update_urls_dictionary.values())}) + logging.debug("success onboarding.") + except Exception as e: + logging.error("Exception: " + str(e)) + raise e # TODO: Handle errors and exceptions on the poison queue diff --git a/functions/pipeline/onboardqueueproccessor/function.json b/functions/pipeline/onboardqueueproccessor/function.json new file mode 100644 index 00000000..57c6527c --- /dev/null +++ b/functions/pipeline/onboardqueueproccessor/function.json @@ -0,0 +1,13 @@ +{ + "scriptFile": "__init__.py", + "disabled": false, + "bindings": [ + { + "type": "queueTrigger", + "direction": "in", + "name": "msg", + "queueName": "onboardqueue", + "connection": "STORAGE_CONNECTION_STRING", + } + ] +} \ No newline at end of file diff --git a/functions/pipeline/onboardqueueproccessor/host.json b/functions/pipeline/onboardqueueproccessor/host.json new file mode 100644 index 00000000..81e35b7b --- /dev/null +++ b/functions/pipeline/onboardqueueproccessor/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" +} \ No newline at end of file diff --git a/functions/pipeline/requirements.txt b/functions/pipeline/requirements.txt new file mode 100644 index 00000000..d521c99a --- /dev/null +++ b/functions/pipeline/requirements.txt @@ -0,0 +1,13 @@ +azure-functions==1.0.0a5 +azure-functions-worker==1.0.0a6 +azure-storage-blob==1.4.0 +azure-storage-file==1.4.0 +azure-storage-queue==1.4.0 +grpcio==1.14.2 +grpcio-tools==1.14.2 +protobuf==3.6.1 +six==1.11.0 +pg8000==1.12.3 +Pillow==5.3.0 +urlpath==1.1.4 +jsonpickle diff --git a/functions/pipeline/resettagstate/__init__.py b/functions/pipeline/resettagstate/__init__.py new file mode 100644 index 00000000..54083799 --- /dev/null +++ b/functions/pipeline/resettagstate/__init__.py @@ -0,0 +1,21 @@ +import datetime +import logging + +import azure.functions as func +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageInfo + + +def main(mytimer: func.TimerRequest) -> None: + utc_timestamp = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc).isoformat() + + try: + data_access = ImageTagDataAccess(get_postgres_provider()) + stale_image_ids = data_access.reset_stale_checkedout_images() + logging.info('Reset {} images to TAGGING_INCOMPLETE state'.format(len(stale_image_ids))) + except Exception as e: + logging.error('Error encounted while trying to reset stale image states. {}'.format(e)) + raise + + logging.info('Successfully ran reset function at %s', utc_timestamp) diff --git a/functions/pipeline/resettagstate/function.json b/functions/pipeline/resettagstate/function.json new file mode 100644 index 00000000..e6ee0e77 --- /dev/null +++ b/functions/pipeline/resettagstate/function.json @@ -0,0 +1,11 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "mytimer", + "type": "timerTrigger", + "direction": "in", + "schedule": "0 0 0 * * *" + } + ] +} \ No newline at end of file diff --git a/functions/pipeline/resettagstate/readme.md b/functions/pipeline/resettagstate/readme.md new file mode 100644 index 00000000..e8b7e887 --- /dev/null +++ b/functions/pipeline/resettagstate/readme.md @@ -0,0 +1,11 @@ +# TimerTrigger - Python + +The `TimerTrigger` makes it incredibly easy to have your functions executed on a schedule. This sample demonstrates a simple use case of calling your function every 5 minutes. + +## How it works + +For a `TimerTrigger` to work, you provide a schedule in the form of a [cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression)(See the link for full details). A cron expression is a string with 6 separate expressions which represent a given schedule via patterns. The pattern we use to represent every 5 minutes is `0 */5 * * * *`. This, in plain text, means: "When seconds is equal to 0, minutes is divisible by 5, for any hour, day of the month, month, day of the week, or year". + +## Learn more + + Documentation diff --git a/functions/pipeline/shared/__init__.py b/functions/pipeline/shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/functions/pipeline/shared/constants/__init__.py b/functions/pipeline/shared/constants/__init__.py new file mode 100644 index 00000000..8d52ee33 --- /dev/null +++ b/functions/pipeline/shared/constants/__init__.py @@ -0,0 +1,13 @@ +from enum import Enum, unique + + +@unique +class ImageFileType(Enum): + GIF = ".gif" + PNG = ".png" + JPG = ".jpg" + JPEG = ".jpeg" + + @classmethod + def is_supported_filetype(cls, value): + return any(value.lower() == item.value.lower() for item in cls) diff --git a/functions/pipeline/shared/db_access/__init__.py b/functions/pipeline/shared/db_access/__init__.py new file mode 100644 index 00000000..ff344272 --- /dev/null +++ b/functions/pipeline/shared/db_access/__init__.py @@ -0,0 +1,2 @@ +from .models import ImageTag, ImageInfo, ImageTagState, ImageLabel, PredictionLabel, TrainingSession, Tag +from .db_access_v2 import ImageTagDataAccess \ No newline at end of file diff --git a/functions/pipeline/shared/db_access/db_access_v2.py b/functions/pipeline/shared/db_access/db_access_v2.py new file mode 100644 index 00000000..89f985d0 --- /dev/null +++ b/functions/pipeline/shared/db_access/db_access_v2.py @@ -0,0 +1,624 @@ +import string +import logging +import random +import getpass +import itertools +import json +from ..db_provider import DatabaseInfo, PostGresProvider +from .models import ImageTag, ImageLabel, ImageTagState, AnnotatedLabel, Tag, ImageInfo, PredictionLabel, TrainingSession + +class ImageTagDataAccess(object): + def __init__(self, db_provider): + self._db_provider = db_provider + + def test_connection(self): + conn = self._db_provider.get_connection() + cursor = conn.cursor() + cursor.execute('select * from tagstate') + row = cursor.fetchone() + logging.info('') + while row: + logging.info(str(row[0]) + " " + str(row[1])) + row = cursor.fetchone() + + def create_user(self,user_name): + user_id = -1 + if not user_name: + raise ArgumentException("Parameter cannot be an empty string") + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("WITH existingUser AS ( " + "SELECT Userid,UserName FROM User_Info " + "WHERE username = %s), " + "data(user_name) AS (values (%s)), " + "newUser AS ( " + "INSERT INTO User_Info (UserName) " + "SELECT d.user_name FROM data d " + "WHERE NOT EXISTS (select 1 FROM User_Info u WHERE u.UserName = d.user_name) " + "RETURNING userid,username) " + "SELECT userid,username FROM newUser " + "UNION ALL " + "SELECT userid,username FROM existingUser") + cursor.execute(query,(user_name,user_name,)) + user_id = cursor.fetchone()[0] + conn.commit() + finally: cursor.close() + except Exception as e: + logging.error("An error occured creating a user: {0}".format(e)) + raise + finally: conn.close() + return user_id + + def get_images_for_tagging(self, number_of_images, user_id): + if number_of_images <= 0: + raise ArgumentException("Parameter must be greater than zero") + + selected_images_to_tag = {} + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("SELECT b.ImageId, b.ImageLocation, a.TagStateId FROM Image_Tagging_State a " + "JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId IN ({1}, {2}) order by " + "a.createddtim DESC limit {0}") + cursor.execute(query.format(number_of_images, ImageTagState.READY_TO_TAG, ImageTagState.INCOMPLETE_TAG)) + for row in cursor: + logging.debug('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2])) + selected_images_to_tag[row[0]] = str(row[1]) + self._update_images(selected_images_to_tag,ImageTagState.TAG_IN_PROGRESS, user_id, conn) + finally: + cursor.close() + except Exception as e: + logging.error("An errors occured getting images: {0}".format(e)) + raise + finally: + conn.close() + return selected_images_to_tag + + def add_new_images(self,list_of_image_infos, user_id): + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + url_to_image_id_map = {} + if(len(list_of_image_infos) > 0): + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + for img in list(list_of_image_infos): + query = ("INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width,CreatedByUser) " + "VALUES (%s,%s,%s,%s,%s) RETURNING ImageId;") + cursor.execute(query,(img.image_name,img.image_location,img.height,img.width,user_id)) + new_img_id = cursor.fetchone()[0] + url_to_image_id_map[img.image_location] = new_img_id + conn.commit() + finally: cursor.close() + logging.debug("Inserted {0} images to the DB".format(len(url_to_image_id_map))) + except Exception as e: + logging.error("An errors occured getting image ids: {0}".format(e)) + raise + finally: conn.close() + return url_to_image_id_map + + def get_images_by_tag_status(self, tag_status, limit=None): + images_by_tag_status = {} + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + tags = '' + for id in tag_status: + tags += str(id) + ',' + tags = tags[:-1] + query = ("SELECT b.ImageId, b.ImageLocation, a.TagStateId FROM Image_Tagging_State a " + "JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId IN ({0}) order by " + "a.createddtim DESC") + if limit: + query += " limit {1}" + cursor.execute(query.format(tags, limit)) + for row in cursor: + logging.debug('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2])) + images_by_tag_status[row[0]] = str(row[1]) + finally: + cursor.close() + except Exception as e: + logging.error("An errors occured getting ready to tag images: {0}".format(e)) + raise + finally: + conn.close() + return images_by_tag_status + + def get_image_info_for_image_ids(self, image_ids): + if not image_ids: + return list() + + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + ids = '' + for id in image_ids: + ids += str(id) + ',' + ids = ids[:-1] + query = ("select a.imageid, a.originalimagename, a.imagelocation, a.height, a.width, a.createdbyuser, b.TagStateId from image_info a LEFT JOIN Image_Tagging_State b ON a.imageid = b.imageid where a.imageid IN ({0});") + cursor.execute(query.format(ids)) + logging.debug("Got image info back for image_id={}".format(image_ids)) + + images_info = [] + for row in cursor: + info = {} + info['height'] = row[3] + info['width'] = row[4] + info['name'] = row[1] + info['location'] = row[2] + info['id'] = row[0] + info['tagstate'] = row[6] + images_info.append(info) + finally: + cursor.close() + except Exception as e: + logging.error("An error occurred getting image tags {0}".format(e)) + raise + finally: + conn.close() + return list(images_info) + + def checkout_images(self, image_count, user_id): + if type(image_count) is not int: + raise TypeError('image_count must be an integer') + image_id_to_image_labels = {} + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("with pl AS ( " + "SELECT p.*, ci.classificationname " + "FROM prediction_labels p " + "join classification_info ci on ci.classificationid = p.classificationid " + "WHERE trainingid = (select MAX(trainingid) From training_info) " + "), " + "its AS ( " + "SELECT s.imageid, ts.tagstatename, i.imagelocation,i.height,i.width " + "FROM image_tagging_state s " + "join image_info i on i.imageid = s.imageid " + "join tag_state ts on ts.tagstateid = s.tagstateid " + "WHERE s.tagstateid in ({0},{1}) LIMIT {2} " + ") " + "select " + "its.imageid, " + "its.imagelocation, " + "pl.classificationid, " + "pl.classificationname, " + "pl.x_min, " + "pl.x_max, " + "pl.y_min, " + "pl.y_max, " + "its.height, " + "its.width, " + "pl.boxconfidence, " + "pl.imageconfidence, " + "its.tagstatename " + "FROM its " + "left outer join pl on its.imageid = pl.imageid") + cursor.execute(query.format(ImageTagState.READY_TO_TAG, ImageTagState.INCOMPLETE_TAG, image_count)) + + logging.debug("Got image tags back for image_count={0}".format(image_count)) + + for row in cursor: + image_tag = {} + # Handle the incomplete case + if row[4] and row[5] and row[6] and row[7]: + image_tag = ImageTag(row[0], float(row[4]), float(row[5]), float(row[6]), float(row[7]), row[3]) + if row[0] not in image_id_to_image_labels: + image_label = ImageLabel(row[0], row[1], row[8], row[9], [image_tag]) + image_id_to_image_labels[row[0]] = image_label + else: + image_id_to_image_labels[row[0]].labels.append(image_tag) + + logging.debug("Checked out images: " + str(image_id_to_image_labels)) + images_ids_to_update = list(image_id_to_image_labels.keys()) + self._update_images(images_ids_to_update, ImageTagState.TAG_IN_PROGRESS, user_id, conn) + finally: + cursor.close() + except Exception as e: + logging.error("An error occurred checking out {0} images: {1}".format(image_count, e)) + raise + finally: + conn.close() + return list(image_id_to_image_labels.values()) + + + def reset_stale_checkedout_images(self): + stale_image_ids = [] + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("SELECT imageid, tagstateid, modifieddtim " + "FROM image_tagging_state " + "WHERE tagstateid = 2 AND modifieddtim < now() - interval '3 day' " + "order by modifieddtim ASC;") + cursor.execute(query.format(ImageTagState.TAG_IN_PROGRESS)) + for row in cursor: + logging.debug('Image Id: {0} \t\ttagstateid: {1} \t\tmodifieddtim: {2}'.format(row[0], row[1], row[2])) + stale_image_ids.append(row[0]) + chron_user_id = self.create_user('chron_job_user') + self._update_images(stale_image_ids,ImageTagState.INCOMPLETE_TAG, chron_user_id, conn) + finally: + cursor.close() + except Exception as e: + logging.error("An error occured while returning stale checkedout images to incomplete tagging state: {0}".format(e)) + raise + finally: + conn.close() + return stale_image_ids + + + def get_existing_classifications(self): + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = "SELECT classificationname from classification_info order by classificationname asc" + cursor.execute(query) + + classification_set = set() + for row in cursor: + logging.debug(row) + classification_set.add(row[0]) + logging.debug("Got back {0} classifications existing in db.".format(len(classification_set))) + finally: + cursor.close() + except Exception as e: + logging.error("An error occurred getting classifications from DB: {0}".format(e)) + raise + finally: + conn.close() + return list(classification_set) + + def update_incomplete_images(self, list_of_image_ids, user_id): + #TODO: Make sure the image ids are in a TAG_IN_PROGRESS state + self._update_images(list_of_image_ids,ImageTagState.INCOMPLETE_TAG,user_id, self._db_provider.get_connection()) + logging.debug("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.INCOMPLETE_TAG.name)) + + def update_completed_untagged_images(self,list_of_image_ids, user_id): + #TODO: Make sure the image ids are in a TAG_IN_PROGRESS state + self._update_images(list_of_image_ids,ImageTagState.COMPLETED_TAG,user_id, self._db_provider.get_connection()) + logging.debug("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.COMPLETED_TAG.name)) + + def _update_images(self, list_of_image_ids, new_image_tag_state, user_id, conn): + if not isinstance(new_image_tag_state, ImageTagState): + raise TypeError('new_image_tag_state must be an instance of Direction Enum') + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + if not conn: + conn = self._db_provider.get_connection() + + try: + if(len(list_of_image_ids) > 0): + cursor = conn.cursor() + try: + image_ids_as_strings = [str(i) for i in list_of_image_ids] + images_to_update = '{0}'.format(', '.join(image_ids_as_strings)) + # TODO: find another way to do string subsitution that doesn't break this query + query = "UPDATE Image_Tagging_State SET TagStateId = {0}, ModifiedByUser = {2}, ModifiedDtim = now() WHERE ImageId IN ({1})" + cursor.execute(query.format(new_image_tag_state,images_to_update,user_id)) + conn.commit() + finally: cursor.close() + else: + logging.debug("No images to update") + except Exception as e: + logging.error("An errors occured updating images: {0}".format(e)) + raise + + def update_image_urls(self,image_id_to_url_map, user_id): + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + if(len(image_id_to_url_map.items())): + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + for image_id, new_url in image_id_to_url_map.items(): + cursor = conn.cursor() + query = "UPDATE Image_Info SET ImageLocation = '{0}', ModifiedDtim = now() WHERE ImageId = {1}" + cursor.execute(query.format(new_url,image_id)) + conn.commit() + logging.debug("Updated ImageId: {0} to new ImageLocation: {1}".format(image_id,new_url)) + self._update_images([image_id],ImageTagState.READY_TO_TAG, user_id,conn) + logging.debug("ImageId: {0} to has a new state: {1}".format(image_id,ImageTagState.READY_TO_TAG.name)) + finally: cursor.close() + except Exception as e: + logging.error("An errors occured updating image urls: {0}".format(e)) + raise + finally: conn.close() + + def get_classification_map(self, class_names: set, user_id: int) -> dict: + class_to_id = {} + if not class_names: + raise ValueError("Classification names must be present") + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("WITH sc AS ( " + "SELECT classificationid, classificationname FROM classification_info " + "WHERE classificationname in ({0})), " + "data(class_name) AS (values {1}), " + "ci AS ( " + "INSERT INTO classification_info (ClassificationName) " + "SELECT d.class_name FROM data d " + "WHERE NOT EXISTS (select 1 FROM classification_info c WHERE c.classificationname = d.class_name) " + "RETURNING classificationid,classificationname) " + "SELECT classificationid,classificationname FROM ci " + "UNION ALL " + "SELECT classificationid,classificationname FROM sc") + class_names_where = "'{0}'".format("', '".join(class_names)) + class_names_value = ", ".join("('{0}')".format(class_name) for class_name in class_names) + query = query.format(class_names_where,class_names_value) + cursor.execute(query) + conn.commit() + for row in cursor: + logging.debug(row) + class_to_id[row[1]] = int(row[0]) + finally: cursor.close() + except Exception as e: + logging.error("An errors occured upserting classification names: {0}".format(e)) + raise + finally: conn.close() + return class_to_id + + def update_tagged_images_v2(self, annotated_labels: list, user_id: int): + if(not annotated_labels): + return + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + labels_length = len(annotated_labels) + all_image_ids = list(l.image_id for l in annotated_labels) + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = "INSERT INTO Annotated_Labels(ImageId,ClassificationId,X_Min,X_Max,Y_Min,Y_Max,CreatedByUser) VALUES " + #Build query so we can insert all rows at once + for i in range(labels_length): + label = annotated_labels[i] + query+="({0},{1},{2},{3},{4},{5},{6}) ".format(label.image_id,label.classification_id, + label.x_min,label.x_max,label.y_min,label.y_max,user_id) + if i != labels_length-1: query+="," + cursor.execute(query) + self._update_images(all_image_ids,ImageTagState.COMPLETED_TAG,user_id,conn) + conn.commit() + #logging.debug("Updated status for {0} images".format(len(all_image_ids))) + finally: cursor.close() + except Exception as e: + logging.error("An errors occured updating tagged image: {0}".format(e)) + raise + finally: conn.close() + + def convert_to_annotated_label(self, image_tags: list, class_map: dict): + annotated_labels = [] + for img_tag in image_tags: + for class_name in img_tag.classification_names: + annotated_labels.append(AnnotatedLabel(img_tag.image_id,class_map[class_name], + img_tag.x_min,img_tag.x_max,img_tag.y_min,img_tag.y_max)) + return annotated_labels + + def add_prediction_labels(self, prediction_labels: list, training_id: int): + if(not prediction_labels): + return + + if type(training_id) is not int: + raise TypeError('training id must be an integer') + + labels_length = len(prediction_labels) + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = "INSERT INTO Prediction_Labels(TrainingId,ImageId,ClassificationId,X_Min,X_Max,Y_Min,Y_Max,BoxConfidence,ImageConfidence) VALUES " + #Build query so we can insert all rows at once + for i in range(labels_length): + label = prediction_labels[i] + query+="({0},{1},{2},{3},{4},{5},{6},{7},{8}) ".format(training_id,label.image_id,label.classification_id, + label.x_min,label.x_max,label.y_min,label.y_max, + label.box_confidence,label.image_confidence) + if i != labels_length-1: query+="," + cursor.execute(query) + #TODO: Update some sort of training status table? + #self._update_training_status(training_id,conn) + conn.commit() + finally: cursor.close() + logging.debug('Inserted {0} predictions for training id {1}'.format(labels_length, training_id)) + except Exception as e: + logging.error("An errors occured updating tagged image: {0}".format(e)) + raise + finally: conn.close() + + def add_training_session(self, training: TrainingSession, user_id: int): + training_id = -1 + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("WITH t AS ( " + "INSERT INTO Training_Info (TrainingDescription, ModelLocation, ClassPerfAvg, CreatedByUser) " + "VALUES ('{}','{}',{},{}) RETURNING TrainingId), " + "p AS (INSERT INTO Class_Performance (TrainingId,ClassificationId,AvgPerf) " + "VALUES ") + query = query.format(training.description,training.model_url,training.avg_perf,user_id) + + # Append multiple TrainingId, ClassificationId and Performance values to above query + # Comma is more rows, closing parenthesis is on the last row + num_of_classes = len(training.class_perf) + for index, classId in enumerate(training.class_perf): + query += "((SELECT t.TrainingId FROM t), {}, {}) ".format(classId,training.class_perf[classId]) + if index != num_of_classes - 1: + query += ", " + elif index == num_of_classes - 1: + query += ") " + # Finally appending a query to return the new training id + query += "SELECT t.TrainingId FROM t" + cursor.execute(query) + training_id = cursor.fetchone()[0] + conn.commit() + finally: cursor.close() + logging.debug('Created training session with id {} for user id {}'.format(training_id,user_id)) + except Exception as e: + logging.error('An error occured saving the training session: {}'.format(e)) + raise + finally: conn.close() + return training_id + + # In practice we won't be getting multiple class names per bounding box however + # VOTT supports this. If multple class names per boounding box is common we can get more + # efficient with the nesting to avoid dupe bounding boxes per image + def get_labels(self): + id_to_imagelabels = {} + conn = None + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("SELECT d.imageid, d.imagelocation, d.height, d.width, " + "c.classificationname, x_min, x_max, y_min, y_max " + "FROM Annotated_Labels a " + "inner join classification_info c on a.classificationid = c.classificationid " + "inner join image_info d on d.imageid = a.imageid ") + cursor.execute(query) + for row in cursor: + image_id = row[0] + tag = Tag(row[4],float(row[5]),float(row[6]),float(row[7]),float(row[8])) + if image_id in id_to_imagelabels: + id_to_imagelabels[image_id].labels.append(tag) + else: + img_label = ImageLabel(image_id,row[1],row[2],row[3],[tag]) + id_to_imagelabels[image_id] = img_label + + logging.debug("Found labels for {0} images".format(len(id_to_imagelabels))) + finally: + cursor.close() + except Exception as e: + logging.error("An error occurred getting labels: {0}".format(e)) + raise + finally: + conn.close() + return list(id_to_imagelabels.values()) + +class ArgumentException(Exception): + pass + +def main(): + ################################################################# + # This main method is an example of how to use some of + # the ImageTagDataAccess methods. For instance: + # Creating a User + # Onboarding of new images + # Checking in images been tagged + ################################################################# + + # import sys + # import os + # sys.path.append("..") + # sys.path.append(os.path.abspath('db_provider')) + # from db_provider import DatabaseInfo, PostGresProvider + + #Replace me for testing + db_config = DatabaseInfo("","","","") + data_access = ImageTagDataAccess(PostGresProvider(db_config)) + user_id = data_access.create_user(getpass.getuser()) + logging.info("The user id for '{0}' is {1}".format(getpass.getuser(),user_id)) + + #img_labels = data_access.get_labels() + + simulate_onboarding = False + if simulate_onboarding: + list_of_image_infos = generate_test_image_infos(5) + url_to_image_id_map = data_access.add_new_images(list_of_image_infos,user_id) + + image_ids = list(url_to_image_id_map.values()) + #Skip extra stuff and just put the images to expected state for testing + data_access._update_images(image_ids,ImageTagState.READY_TO_TAG, user_id,None) + + simulate_tagging = False + if simulate_tagging: + image_tags = generate_test_image_tags(image_ids,4,4) + + all_class_name_lists = (list(it.classification_names for it in image_tags)) + unique_class_names = set(x for l in all_class_name_lists for x in l) + print(len(unique_class_names)) + + #What the Labels API will do when POST http action occurs + class_map = data_access.get_classification_map(unique_class_names,user_id) + annotated_labels = data_access.convert_to_annotated_label(image_tags,class_map) + data_access.update_tagged_images_v2(annotated_labels,user_id) + + simulate_post_training = False + if simulate_post_training and simulate_tagging: + training_id = 1 + prediction_labels = generate_test_prediction_labels(training_id,image_ids, class_map) + data_access.add_prediction_labels(prediction_labels,training_id) + + +TestClassifications = ("maine coon","german shephard","goldfinch","mackerel","african elephant","rattlesnake") + +def generate_test_image_infos(count): + list_of_image_infos = [] + for i in range(count): + file_name = "{0}.jpg".format(id_generator(size=random.randint(4,10))) + image_location = "https://mock-storage.blob.core.windows.net/new-uploads/{0}".format(file_name) + img = ImageInfo(file_name,image_location,random.randint(100,600),random.randint(100,600)) + list_of_image_infos.append(img) + return list_of_image_infos + +def generate_test_image_tags(list_of_image_ids,max_tags_per_image,max_classifications_per_tag): + list_of_image_tags = [] + for image_id in list(list_of_image_ids): + tags_per_image = random.randint(1,max_tags_per_image) + for i in range(tags_per_image): + x_min = random.uniform(50,300) + x_max = random.uniform(x_min,300) + y_min = random.uniform(50,300) + y_max = random.uniform(y_min,300) + classifications_per_tag = random.randint(1,max_classifications_per_tag) + image_tag = ImageTag(image_id,x_min,x_max,y_min,y_max,random.sample(TestClassifications,classifications_per_tag)) + list_of_image_tags.append(image_tag) + return list_of_image_tags + +def generate_test_prediction_labels(training_id, list_of_image_ids,class_map: dict): + list_of_prediction_labels = [] + for image_id in list(list_of_image_ids): + tags_per_image = random.randint(1,3) + for i in range(tags_per_image): + x_min = random.uniform(50,300) + x_max = random.uniform(x_min,300) + y_min = random.uniform(50,300) + y_max = random.uniform(y_min,300) + image_conf = random.uniform(.5,1) + box_conf = random.uniform(image_conf,1) + class_name = random.choice(TestClassifications) + class_id = class_map[class_name] + prediction_label = PredictionLabel(training_id,image_id,class_id, + x_min,x_max,y_min,y_max,random.randint(100,600), + random.randint(100,600), box_conf,image_conf) + list_of_prediction_labels.append(prediction_label) + return list_of_prediction_labels + +def id_generator(size=6, chars=string.ascii_uppercase + string.digits): + return ''.join(random.choice(chars) for _ in range(size)) + +if __name__ == "__main__": + #Log to console when run locally + console = logging.StreamHandler() + log = logging.getLogger() + log.setLevel(logging.getLevelName('DEBUG')) + log.addHandler(console) + main() diff --git a/functions/pipeline/shared/db_access/models.py b/functions/pipeline/shared/db_access/models.py new file mode 100644 index 00000000..941b6228 --- /dev/null +++ b/functions/pipeline/shared/db_access/models.py @@ -0,0 +1,105 @@ +from enum import IntEnum, unique + +@unique +class ImageTagState(IntEnum): + NOT_READY = 0 + READY_TO_TAG = 1 + TAG_IN_PROGRESS = 2 + COMPLETED_TAG = 3 + INCOMPLETE_TAG = 4 + ABANDONED = 5 + +# An entity class for a VOTT image +class ImageInfo(object): + def __init__(self, image_name, image_location, height, width): + self.image_name = image_name + self.image_location = image_location + self.height = height + self.width = width + + +# Entity class for Tags stored in DB +class ImageTag(object): + def __init__(self, image_id, x_min, x_max, y_min, y_max, classification_names): + self.image_id = image_id + self.x_min = x_min + self.x_max = x_max + self.y_min = y_min + self.y_max = y_max + self.classification_names = classification_names + + @staticmethod + def fromJson(dictionary): + if dictionary.items(): + image_tag = ImageTag(dictionary["image_id"], dictionary["x_min"], dictionary["x_max"], dictionary["y_min"], dictionary["y_max"], dictionary["classification_names"]) + return image_tag + +#This class doesn't have box and image confidence because they are human curated labels +class AnnotatedLabel(object): + def __init__(self, image_id, classification_id, x_min, x_max, y_min, y_max): + self.image_id = image_id + self.x_min = x_min + self.x_max = x_max + self.y_min = y_min + self.y_max = y_max + self.classification_id = classification_id + + +class ImageLabel(object): + def __init__(self,image_id, imagelocation,image_height: int, image_width: int, labels: list, user_folder=None): + self.image_id = image_id + self.imagelocation = imagelocation + self.image_height = image_height + self.image_width = image_width + self.user_folder = user_folder + self.labels = labels + + @staticmethod + def fromJson(dictionary): + tags = [] + if (isinstance(dictionary["labels"], dict)): + tags = [ImageTag.fromJson(dictionary["labels"])] + elif (isinstance(dictionary["labels"], list)): + tags = [ImageTag.fromJson(label) for label in dictionary["labels"]] + + image_label = ImageLabel(dictionary["image_id"], dictionary["imagelocation"], dictionary["image_height"], dictionary["image_width"], tags, dictionary.get("user_folder")) + return image_label + + +class Tag(object): + def __init__(self,classificationname, x_min: float, x_max: float, y_min: float, y_max: float): + self.x_min = x_min + self.x_max = x_max + self.y_min = y_min + self.y_max = y_max + self.classificationname = classificationname + + def convert_to_relative(self, width, height): + self.x_min = self.x_min/width + self.x_max = self.x_max/width + self.y_min = self.y_min/height + self.y_max = self.y_max/height + + +class PredictionLabel(AnnotatedLabel): + def __init__(self, training_id, image_id, classification_id, x_min, x_max, y_min, y_max, + image_height, image_width, box_confidence=0, image_confidence= 0): + super().__init__(image_id, classification_id, x_min, x_max, y_min, y_max) + self.training_id = training_id + self.image_height = image_height + self.image_width = image_width + self.box_confidence = box_confidence + self.image_confidence = image_confidence + + def convert_to_absolute(self): + self.x_min = self.x_min*self.image_width + self.x_max = self.x_max*self.image_width + self.y_min = self.y_min*self.image_height + self.y_max = self.y_max*self.image_height + +class TrainingSession(object): + def __init__(self, description, model_url, avg_perf: float, class_perf: dict): + self.description = description + self.model_url = model_url + self.avg_perf = avg_perf + self.class_perf = class_perf diff --git a/functions/pipeline/shared/db_access/test_db_access_v2.py b/functions/pipeline/shared/db_access/test_db_access_v2.py new file mode 100644 index 00000000..f8f22a23 --- /dev/null +++ b/functions/pipeline/shared/db_access/test_db_access_v2.py @@ -0,0 +1,100 @@ +import unittest +from unittest.mock import patch +from unittest.mock import Mock + +from .db_access_v2 import( + ImageTagDataAccess, + ArgumentException, + ImageTagState, + generate_test_image_infos +# _update_images, +# create_user, +# get_image_ids_for_new_images, +# get_new_images +) + +class MockConnection: + def _mock_cursor(self): + self.fetchCount=5 + + def fetchone(): + if (self.fetchCount): + self.fetchCount = self.fetchCount-1 + return (["A","B"]) + return None + + def execute(query): + return + + test = Mock() + test.execute = execute + test.fetchone = fetchone + return test + + def cursor(self): + return self._mock_cursor() + +class MockDBProvider: + def __init__(self, fail = False): + self.fail = fail + + def get_connection(self): + if self.fail: + raise Exception + return MockConnection() + +class TestImageTagDataAccess(unittest.TestCase): + def test_connection(self): + print("Running...") + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.test_connection() + self.assertEqual(5, 5) + + def test_create_user_empty_string(self): + with self.assertRaises(ArgumentException): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.create_user('') + + def test_create_user_db_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access.create_user('MyUserName') + + def test_update_image_bad_image_state(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access._update_images((),"I should be an enum",1,None) + + def test_update_image_db_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access._update_images((),ImageTagState.READY_TO_TAG,1,None) + + def test_get_new_images_bad_request(self): + with self.assertRaises(ArgumentException): + data_access = ImageTagDataAccess(MockDBProvider()) + num_of_images = -5 + data_access.get_images_for_tagging(num_of_images, 5) + + def test_add_new_images_user_id_type_error(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.add_new_images((),"I should be an integer") + + def test_add_new_images_connection_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access.add_new_images(generate_test_image_infos(5),10) + + # def test_add_new_images_cursor_error(self): + # with self.assertRaises(Exception): + # data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + # data_access.add_new_images(generate_test_image_infos(5),10) + + def test_update_image_urls_user_id_type_error(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.update_image_urls((),"I should be an integer") + +if __name__ == '__main__': + unittest.main() diff --git a/functions/pipeline/shared/db_provider/__init__.py b/functions/pipeline/shared/db_provider/__init__.py new file mode 100644 index 00000000..54da8ce0 --- /dev/null +++ b/functions/pipeline/shared/db_provider/__init__.py @@ -0,0 +1 @@ +from .db_provider import DatabaseInfo, DBProvider, PostGresProvider, get_postgres_provider diff --git a/functions/pipeline/shared/db_provider/db_provider.py b/functions/pipeline/shared/db_provider/db_provider.py new file mode 100644 index 00000000..f6477bee --- /dev/null +++ b/functions/pipeline/shared/db_provider/db_provider.py @@ -0,0 +1,65 @@ +import pg8000 +import os + +# import pyodbc + +# Used for testing +default_db_host = "" +default_db_name = "" +default_db_user = "" +default_db_pass = "" + +def get_postgres_provider(): + return PostGresProvider(__get_database_info_from_env()) + + +def __get_database_info_from_env(): + return DatabaseInfo(os.getenv('DB_HOST', default_db_host), os.getenv('DB_NAME', default_db_name), + os.getenv('DB_USER', default_db_user), os.getenv('DB_PASS', default_db_pass)) + + +class DatabaseInfo(object): + def __init__(self, db_host_name, db_name, db_user_name, db_password): + self.db_host_name = db_host_name + self.db_name = db_name + self.db_user_name = db_user_name + self.db_password = db_password + + +class DBProvider(object): + def __new_connection(self, host_name, db_name, db_user, db_pass): pass + + def get_connection(self): pass + + def cursor(self): pass + + def execute(self, query): pass + + +class PostGresProvider(DBProvider): + + def __init__(self, database_info): + self.database_info = database_info + + def __new_connection(self, host_name, db_name, db_user, db_pass): + return pg8000.connect(db_user, host=host_name, unix_sock=None, port=5432, database=db_name, password=db_pass, + ssl=True, timeout=None, application_name=None) + + def get_connection(self): + # self.connection = + return self.__new_connection(self.database_info.db_host_name, self.database_info.db_name, + self.database_info.db_user_name, self.database_info.db_password) + + +''' +class MSSqlProvider(DBProvider): + DRIVER= '{ODBC Driver 17 for SQL Server}' + def __init__(self, database_info): + self.database_info = database_info + + def __new_connection(self,host_name,db_name,db_user,db_pass): + return pyodbc.connect('DRIVER='+self.DRIVER+';PORT=1433;SERVER='+host_name+';PORT=1443;DATABASE='+db_name+';UID='+db_user+';PWD='+ db_pass) + + def get_connection(self): + return self.__new_connection(self.database_info.db_host_name,self.database_info.db_name,self.database_info.db_user_name,self.database_info.db_password) +''' \ No newline at end of file diff --git a/functions/pipeline/shared/onboarding/__init__.py b/functions/pipeline/shared/onboarding/__init__.py new file mode 100644 index 00000000..dd1081a9 --- /dev/null +++ b/functions/pipeline/shared/onboarding/__init__.py @@ -0,0 +1,115 @@ +import os +import logging +from enum import Enum +from datetime import datetime +import time +import asyncio + +TIMEOUT_SECONDS = 1 + +class CopyStatus(Enum): + SUCCESS = "success", + PENDING = "pending", + ABORTED = "aborted", + FAILED = "failed", + TIMEOUT = "timeout" # custom status + +class DeleteStatus(Enum): + SUCCESS = "success", + PENDING = "pending", + ABORTED = "aborted", + FAILED = "failed", + TIMEOUT = "timeout" # custom status + +# Initiates copy of images from temporary to permanent storage, and checks the status of each operation. +# Returns two dictionaries, copy_succeeded_dict and copy_error_dict, in the format {sourceURL : destinationURL }. +def copy_images_to_permanent_storage(image_id_url_map, copy_source, copy_destination, blob_service): + copy_initiated_dict = {} # Dictionary of images for which copy was successfully initiated + copy_error_dict = {} # Dictionary of images for which some error/exception occurred + + # Create new blob names + for key, value in image_id_url_map.items(): + original_blob_url = key + # original_blob_name = original_blob_url.split("/")[-1] + file_extension = os.path.splitext(original_blob_url)[1] + image_id = value + new_blob_name = (str(image_id) + file_extension) + + # Create the destination blob URL + destination_blob_path = blob_service.make_blob_url(copy_destination, new_blob_name) + + # Copy blob from temp storage to permanent storage + logging.info("Now initiating copy of image from temporary to permanent storage...") + # Log source and destination paths for debugging + logging.info("Source path: " + original_blob_url) + logging.info("Destination path: " + destination_blob_path) + try: + blob_service.copy_blob(copy_destination, new_blob_name, original_blob_url) + logging.info("Done.") + # Add to list of items for which we need to check status if copy was initiated successfully + copy_initiated_dict[original_blob_url] = destination_blob_path + except Exception as e: + logging.error("ERROR: Exception thrown during copy attempt: " + str(e)) + copy_error_dict[original_blob_url] = destination_blob_path + + # Wait a few seconds before checking status + time.sleep(TIMEOUT_SECONDS) + + copy_succeeded_dict = {} # Dictionary of copy operations that were successful + + # Get copy status of each item. If status is succeeded, add to success list. Otherwise, add to error list. + for key, value in copy_initiated_dict.items(): + target_blob_properties = blob_service.get_blob_properties(copy_destination, value.split("/")[-1]) + copy_properties = target_blob_properties.properties.copy + # logging.info("Copy status of image" + value.split("/")[-1] + " is: " + copy_properties.status) # Debugging + # if copy_properties.status == CopyStatus.SUCCESS: # Note: Want to remove hard-coding, but this line does not work + if copy_properties.status == "success": + copy_succeeded_dict[key] = value + else: + copy_error_dict[key] = value + + # Debugging + # logging.info("copy_succeeded_dict:") + # for key, value in copy_succeeded_dict.items(): + # logging.info("Key: " + key + " Value: " + value) + # logging.info("copy_error_dict:") + # for key, value in copy_error_dict.items(): + # logging.info("Key: " + key + " Value: " + value) + + return copy_succeeded_dict, copy_error_dict + +# Initiates deletion of images from temporary storage, and then checks whether the images still exist in the container. +# Returns two dictionaries, delete_succeeded_dict and delete_error_dict, in the format {sourceURL : destinationURL }. +def delete_images_from_temp_storage(delete_images_dict, delete_location, blob_service): + delete_initiated_dict = {} # Dictionary of images for which delete was successfully initiated + delete_error_dict = {} # Dictionary of images for which some error/exception occurred + + # Delete blobs from container + for key, value in delete_images_dict.items(): + logging.info("Now initiating delete of image from temp storage...") + logging.info("Image to be deleted: " + key) + try: + blob_service.delete_blob(delete_location, key.split("/")[-1]) + logging.info("Done.") + # Add to list of items to check status if delete was initiated successfully + delete_initiated_dict[key] = value + except Exception as e: + logging.error("ERROR: Exception thrown during delete attempt: " + str(e)) + delete_error_dict[key] = value + + # Wait a few seconds before checking status + time.sleep(TIMEOUT_SECONDS) + + delete_succeeded_dict = {} # Dictionary of delete operations that were successful + + # List blobs in the source container. For each image in delete_initiated_dict, if the blob no longer exists, + # add to delete_succeeded_dict. If the blob still exists, add to delete_error_dict. + blob_list = blob_service.list_blobs(delete_location) + for key, value in delete_initiated_dict.items(): + blob_name = key.split('/')[-1] + if blob_name in blob_list: + delete_error_dict[key] = value + else: + delete_succeeded_dict[key] = value + + return delete_succeeded_dict, delete_error_dict \ No newline at end of file diff --git a/functions/pipeline/shared/storage_utils/__init__.py b/functions/pipeline/shared/storage_utils/__init__.py new file mode 100644 index 00000000..1f252a6a --- /dev/null +++ b/functions/pipeline/shared/storage_utils/__init__.py @@ -0,0 +1,43 @@ +import logging +import os +from urlpath import URL +from datetime import datetime, timedelta +from azure.storage.blob import BlockBlobService, BlobPermissions + +def get_signed_url_for_permstore_blob(permstore_url): + blob_url = URL(permstore_url) + + # create sas signature + blob_service = __get_perm_store_service() + sas_signature = blob_service.generate_blob_shared_access_signature( + os.getenv('DESTINATION_CONTAINER_NAME'), + blob_url.name, + BlobPermissions.READ, + datetime.utcnow() + timedelta(hours=1) + ) + + logging.debug("INFO: have sas signature {}".format(sas_signature)) + signed_url = blob_url.with_query(sas_signature) + return signed_url.as_uri() + + +def __get_perm_store_service(): + return BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), + account_key=os.getenv('STORAGE_ACCOUNT_KEY')) + + +def get_filepath_from_url(blob_url: URL, storage_container): + blob_uri = blob_url.path + return __remove_postfix(__remove_prefix(blob_uri, '/' + storage_container), '/' + blob_url.name) + + +def __remove_prefix(text, prefix): + if text.startswith(prefix): + return text[len(prefix):] + return text + + +def __remove_postfix(text, postfix): + if not text.endswith(postfix): + return text + return text[:len(text)-len(postfix)] \ No newline at end of file diff --git a/functions/pipeline/train/__init__.py b/functions/pipeline/train/__init__.py new file mode 100644 index 00000000..f98c86a8 --- /dev/null +++ b/functions/pipeline/train/__init__.py @@ -0,0 +1,54 @@ +import logging +import jsonpickle +from collections import namedtuple +import azure.functions as func +import json +from ..shared.db_provider import get_postgres_provider +from ..shared.db_access import ImageTagDataAccess, ImageTagState, PredictionLabel, TrainingSession + + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Python HTTP trigger function processed a request.') + + user_name = req.params.get('userName') + + # setup response object + headers = { + "content-type": "application/json" + } + if not user_name: + return func.HttpResponse( + status_code=401, + headers=headers, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) + else: + try: + # DB configuration + data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) + + logging.debug("User '{0}' invoked train api".format(user_name)) + + logging.info("Method = " + str(req.method)) + if req.method == "GET": + return func.HttpResponse( + status_code=200, + headers=headers, + body=jsonpickle.encode("Not implemented",unpicklable=False) + ) + elif req.method == "POST": + payload = json.loads(req.get_body()) + logging.debug("Payload: {}".format(payload)) + payload_json = namedtuple('TrainingSession', payload.keys())(*payload.values()) + training_id = data_access.add_training_session(payload_json, user_id) + return func.HttpResponse( + status_code=201, + headers=headers, + body=str(training_id) + ) + except Exception as e: + return func.HttpResponse( + "exception:" + str(e), + status_code=500 + ) \ No newline at end of file diff --git a/functions/pipeline/train/function.json b/functions/pipeline/train/function.json new file mode 100644 index 00000000..a33d0f5d --- /dev/null +++ b/functions/pipeline/train/function.json @@ -0,0 +1,21 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": [ + "get", + "post" + ] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] + } + \ No newline at end of file diff --git a/functions/pipeline/train/host.json b/functions/pipeline/train/host.json new file mode 100644 index 00000000..1c8bb37a --- /dev/null +++ b/functions/pipeline/train/host.json @@ -0,0 +1,3 @@ +{ + "version": "2.0" + } \ No newline at end of file diff --git a/images/VOTT_animal.PNG b/images/VOTT_animal.PNG new file mode 100644 index 00000000..6e9b3be1 Binary files /dev/null and b/images/VOTT_animal.PNG differ diff --git a/images/init_predict.PNG b/images/init_predict.PNG new file mode 100644 index 00000000..62c26b72 Binary files /dev/null and b/images/init_predict.PNG differ diff --git a/init_pred_desription.md b/init_pred_desription.md new file mode 100644 index 00000000..70143965 --- /dev/null +++ b/init_pred_desription.md @@ -0,0 +1,76 @@ +# Guide to "initialization" predictions +Assuming you got an datatet containing many thousands of images -- how do you get started with labeling first +few hundreds images? +What about unblanced case when most of the pictures not have much going on? +If you just random sample pictures _blindly_ it make quite a few Active Learning cycles to set your model and +training set onto the right pass. + +## Let's get "metadata" about each image +We could use pretrained model that can detect decently few dozens or more object class to get idea what kind +of objects are on the images. The model might not provide super-accurate results however some of those might be +useful for more target image sampling. +For example if you dataset has common scenes of nature or city life than using model trained on [COCO dataset](https://github.com/amikelive/coco-labels/blob/master/coco-labels-paper.txt) +might give you an idea what images have objects that _resembles_ person, car, deer and so on. And depedning on your +scenario you might focus you initial labeling efforts on images that have or don't have a particular class. + +![Flow](images/init_predict.PNG) + +## Settings in config.ini +The following settings control what model is going to be used for "initialization" predictions. + - init_model_name=faster_rcnn_resnet101_coco_2018_01_28 + Model name to be used for predictions. Current code assumes it's COCO based model. + - init_pred_tf_url=http://download.tensorflow.org/models/object_detection/${init_model_name}.tar.gz + URL for downloading model from Tensorflow Object Detection model zoo. + - init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb + Location (on DSVM) of inference graph that's used for producing "initialization" predictions. + +## Running "initialization" predictions flow +Once config settings are set (and images are on blob storage) user needs to do the following: +- SSH to DSVM and run script that will actually produces predictions +- provide desired mapping (and merging) or detected classes to the classes of interest (more details below) +- dowload specified number of images to client machine and review the tags + +*Produce predictions* +SSH to DSVM, activate needed Tensorflow virtul environment if needed and run: + `. ./active_learning_init_pred.sh ../config.ini` + The output _init_totag*.csv_ contains all detecting objetcs bboxes iformation. It's probably worth spedining + the time analizing those results. + + *Class mapping json* + Please refer to _sample_init_classes_map.json_ for reference. + First we define that we want to class "1" to be shown as class "person" in VOTT when user will be doing labels review. + We also want to have 60% of images that will be pulled for review to have presence of class "person" in them: + `{` + `"initclass": "1", ` + `"map": "person",` + `"balance": "0.6"` + `}` + + Then we want to _merge_ several classes: "19" (horse) and "21" (cow) will be displayed in VOTT as "animal". + `{` + `"initclass": "19",` + `"map": "animal",` + `"balance": "0.2"` + `},` + `{` + `"initclass": "21",` + `"map": "animal",` + `"balance": "0.2"` + `}' + + We specify that 20% of each _animal_ class (40% in total) is present in the dataset that user will be reviewing in VOTT. + Also we specifically request not to include images where no known COCO classes were detected. Given that COCO-based + model may miss quite a bit of objects it's good practice still to review some of those. + Model might be detecting classes that we will be cluttering image during review process. For example the dataset + may have busket images that is wrongly classified as a "vase". In scenario when we are not interested in detecting + baskets nor vases we may want just to "drop" bboxes for the "vase" class (class 86 in COCO): + ` "unmapclass":["64","86", "15"],` + Finally for _everything else_ -- classes we are not sure what to do at this stage but still want to preserve bbox -- + we will map then to a "default" class. We can set the name of "default" class in mapping json. + + *Review predictions in VOTT* + On a client (tagger) machine run the usual script to download images. Only difference is that you'd be providing + "class mapping json" as 3rd parameter: + ` D:\repo\active-learning-detect\tag>python download_vott_json.py 200 ..\config.ini ..\sample_init_classes_map.json` + + ![Flow](images/VOTT_animal.PNG) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..f977df3b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +azure-functions==1.0.0a5 +azure-functions-worker==1.0.0a6 +azure-storage-blob==1.4.0 +azure-storage-file==1.4.0 +azure-storage-queue==1.4.0 +grpcio==1.14.2 +grpcio-tools==1.14.2 +protobuf==3.6.1 +requests +six==1.11.0 +pg8000==1.12.3 +Pillow==5.3.0 +opencv_python +urlpath==1.1.4 +jsonpickle diff --git a/tag/download_vott_json.py b/tag/download_vott_json.py index 77cbf164..89e0d7b8 100644 --- a/tag/download_vott_json.py +++ b/tag/download_vott_json.py @@ -8,6 +8,21 @@ import random import colorsys import numpy as np +from io import StringIO +from math import isclose + +import re +import time +from azure.storage.blob import BlockBlobService +import sys +import os + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config +import blob_utils as butils CONFIDENCE_LOCATION = -1 TAG_CONFIDENCE_LOCATION = -2 @@ -20,167 +35,358 @@ # Should be equal to width_location TAG_ENDING_LOCATION = 7 -def make_vott_output(all_predictions, output_location, user_folders, image_loc, blob_credentials = None, +random.seed(42) + +def add_class_name(tag_names, name): + if name not in tag_names: + tag_names = tag_names + [name] + return tag_names + +def remove_class_name(tag_names, name): + if name in tag_names: + tag_names.remove(name) + return tag_names + +def add_bkg_class_name(tag_names): + return add_class_name(tag_names, "NULL") + +def remove_bkg_class_name(tag_names): + return remove_class_name(tag_names, "NULL") + +def get_image_loc(prediction, user_folders, image_loc): + if user_folders: + if image_loc == "": + image_loc = Path(prediction[0][FOLDER_LOCATION]).name + else: + image_loc = image_loc + "/" + Path(prediction[0][FOLDER_LOCATION]).name + + return image_loc + +def get_output_location(prediction, user_folders, output_location_param): + if user_folders: + folder_name = Path(prediction[0][FOLDER_LOCATION]).name + output_location = Path(output_location_param)/folder_name + else: + output_location = Path(output_location_param)/"Images" + return output_location + +def make_vott_output(all_predictions, output_location_param, user_folders, image_loc_param, blob_credentials = None, tag_names: List[str] = ["stamp"], tag_colors: List[str] = "#ed1010", max_tags_per_pixel=None): if max_tags_per_pixel is not None: max_tags_per_pixel = int(max_tags_per_pixel) - if user_folders: - folder_name = Path(all_predictions[0][0][FOLDER_LOCATION]).name - output_location = Path(output_location)/folder_name - else: - output_location = Path(output_location)/"Images" - output_location.mkdir(parents=True, exist_ok=True) + + tag_names = remove_bkg_class_name(tag_names) + + # The tag_colors list generates random colors for each tag. To ensure that these colors stand out / are easy to see on a picture, the colors are generated + # in the hls format, with the random numbers biased towards a high luminosity (>=.8) and saturation (>=.75). + if tag_colors is None: + tag_colors = ['#%02x%02x%02x' % (int(256*r), int(256*g), int(256*b)) for + r,g,b in [colorsys.hls_to_rgb(random.random(),0.8 + random.random()/5.0, 0.75 + random.random()/4.0) for _ in tag_names]] + using_blob_storage = blob_credentials is not None - if using_blob_storage: - blob_service, container_name = blob_credentials - else: - image_loc = Path(image_loc) - if user_folders: - if using_blob_storage: - if image_loc == "": - image_loc = Path(all_predictions[0][0][FOLDER_LOCATION]).name - else: - image_loc = image_loc + "/" + Path(all_predictions[0][0][FOLDER_LOCATION]).name - else: - image_loc = image_loc/all_predictions[0][0][FOLDER_LOCATION] - for prediction in all_predictions: + + dict_predictions_per_folder = {} #defaultdict(list) + i = 0 + n_err = 0 + for prediction in all_predictions[:]: + #print(i) + image_loc = get_image_loc(prediction, user_folders, image_loc_param) + output_location = get_output_location(prediction, user_folders, output_location_param) + if output_location not in dict_predictions_per_folder: + output_location.mkdir(parents=True, exist_ok=True) + dict_predictions_per_folder[output_location] = [] + print("Created dir ", str(output_location)) if using_blob_storage: + blob_dest = str(output_location / prediction[0][FILENAME_LOCATION]) if image_loc: - print(image_loc + "/" + prediction[0][FILENAME_LOCATION]) - blob_service.get_blob_to_path(container_name, image_loc + "/" + prediction[0][FILENAME_LOCATION], - str(output_location/prediction[0][FILENAME_LOCATION])) + blob_name = image_loc + "/" + prediction[0][FILENAME_LOCATION] else: - print(prediction[0][FILENAME_LOCATION]) - blob_service.get_blob_to_path(container_name, prediction[0][FILENAME_LOCATION], - str(output_location/prediction[0][FILENAME_LOCATION])) - else: - shutil.copy(str(image_loc/prediction[0][FILENAME_LOCATION]), str(output_location)) - all_predictions.sort(key=lambda x: x[0][FILENAME_LOCATION]) - dirjson = {} - dirjson["frames"] = {} - for i, predictions in enumerate(all_predictions): - all_frames = [] - set_predictions = defaultdict(list) - if max_tags_per_pixel is None: - for prediction in predictions: - x_1, x_2, y_1, y_2, height, width = map(float, prediction[TAG_STARTING_LOCATION:TAG_ENDING_LOCATION+1]) - if prediction[TAG_LOCATION]!="NULL" and (x_1,x_2,y_1,y_2)!=(0,0,0,0): - x_1 = int(x_1*width) - x_2 = int(x_2*width) - y_1 = int(y_1*height) - y_2 = int(y_2*height) - set_predictions[(x_1, x_2, y_1, y_2, height, width)].append(prediction[TAG_LOCATION]) + blob_name = prediction[0][FILENAME_LOCATION] + + if not butils.attempt_get_blob(blob_credentials, blob_name, blob_dest): + all_predictions.remove(prediction) + n_err = n_err + 1 + continue; else: - if predictions: - num_tags = np.zeros((int(predictions[0][HEIGHT_LOCATION]),int(predictions[0][WIDTH_LOCATION])), dtype=int) - for prediction in sorted(predictions, key=lambda x: float(x[TAG_CONFIDENCE_LOCATION]), reverse=True): + shutil.copy(os.path.join(image_loc, prediction[0][FILENAME_LOCATION]), str(output_location)) + + dict_predictions_per_folder[output_location].append(prediction) + i = i + 1 + + print("Dowloaded {0} files. Number of errors: {1}".format(i, n_err)) + +#TBD: enum through dict and make json per folder! + for output_location, folder_predictions in dict_predictions_per_folder.items(): + folder_predictions.sort(key=lambda x: x[0][FILENAME_LOCATION]) + dirjson = {} + dirjson["frames"] = {} + for i, predictions in enumerate(folder_predictions): + all_frames = [] + file_name = "" + set_predictions = defaultdict(list) + if max_tags_per_pixel is None: + for prediction in predictions: x_1, x_2, y_1, y_2, height, width = map(float, prediction[TAG_STARTING_LOCATION:TAG_ENDING_LOCATION+1]) if prediction[TAG_LOCATION]!="NULL" and (x_1,x_2,y_1,y_2)!=(0,0,0,0): x_1 = int(x_1*width) x_2 = int(x_2*width) y_1 = int(y_1*height) y_2 = int(y_2*height) - if np.amax(num_tags[y_1:y_2, x_1:x_2]) total_rows: + num_rows = total_rows + if is_largest: + top = nlargest(num_rows, arr_image_data, + key=lambda x: float(x[0][CONFIDENCE_LOCATION])) + else: + top = nsmallest(num_rows, arr_image_data, + key=lambda x: float(x[0][CONFIDENCE_LOCATION])) + + return top + +def prepare_per_class_dict(all_files_per_folder, class_balances_cnt, tag_names): + #result = {} + result = defaultdict(list) + for k, v in all_files_per_folder.items(): + v_arr = np.array(v) + classes = v_arr[:, TAG_LOCATION] + for i in range(class_balances_cnt): + class_i = tag_names[i] + if class_i in classes: + result[class_i].append(v) + + return result + + +def parse_class_balance_setting(config_value, expected_cnt): + print("Ideal class balance (from config):", config_value) + if config_value is None: + return None + arr_np = np.genfromtxt(StringIO(config_value), dtype=float, delimiter=',', loose=True) + # check f there were non valid numbers + if np.isnan(arr_np.any()): + print("Found NaNs in ideal balance settings:", config_value) + return None + else: + if (arr_np.size != expected_cnt): + print("Size of ideal balance settings {0} is {1}. Expected {2}".format(arr_np.size, arr_np, expected_cnt)) + return None + + s = np.sum(arr_np) + if isclose(s, 1, abs_tol=0.01): + return arr_np + else: + print("Sum of balance settings {0} should add up to 1: {1}".format(config_value, s) ) + + +def filter_top(top, unmapclass_list, tag_names, class_map_dict, default_class): + for im in top: + for obj in im[:]: + obj_init_class = obj[TAG_LOCATION] + # remove bboxes for classes we are not interested in + if obj_init_class in unmapclass_list: + im.remove(obj) + # assign new name to class + if obj_init_class in tag_names: + obj[TAG_LOCATION] = class_map_dict[obj_init_class] + else: + obj[TAG_LOCATION] = default_class + + return top + +def get_top_rows(file_location_totag, num_rows, user_folders, pick_max, tag_names, ideal_class_balance, func_filter_top = None, *args): + with file_location_totag.open(mode='r') as file: reader = csv.reader(file) header = next(reader) - csv_list = list(reader) + totag_list = list(reader) + + all_files = defaultdict(lambda: defaultdict(list)) if user_folders: - all_files = defaultdict(lambda: defaultdict(list)) - for row in csv_list: + for row in totag_list: all_files[row[FOLDER_LOCATION]][row[0]].append(row) - all_lists = [] - if pick_max: - for folder_name in all_files: - all_lists.append(nlargest(num_rows, all_files[folder_name].values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION]))) - top_rows = max(all_lists,key=lambda x:sum(float(row[0][CONFIDENCE_LOCATION]) for row in x)) - else: - for folder_name in all_files: - all_lists.append(nsmallest(num_rows, all_files[folder_name].values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION]))) - top_rows = min(all_lists,key=lambda x:sum(float(row[0][CONFIDENCE_LOCATION]) for row in x)) else: - all_files = defaultdict(list) - for row in csv_list: - all_files[row[0]].append(row) - if pick_max: - top_rows = nlargest(num_rows, all_files.values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION])) + for row in totag_list: + all_files['default_folder'][row[0]].append(row) + selected_rows = [] + class_balances_cnt = 1 + if ideal_class_balance is not None: + class_balances_cnt = len(ideal_class_balance) + + + for folder_name in all_files: + if ideal_class_balance is not None: + all_files_per_class = prepare_per_class_dict(all_files[folder_name], class_balances_cnt, tag_names) + for i in range(class_balances_cnt): + num_rows_i = round(num_rows * float(ideal_class_balance[i])) + class_i = tag_names[i] + top = select_rows(all_files_per_class[class_i], num_rows_i, is_largest = pick_max) + + # drop values we selected from the dict + # the same image may have object from diff classes + for j in range(class_balances_cnt): + class_j = tag_names[j] + all_files_per_class[class_j] = [v for v in all_files_per_class[class_j] + if v not in top] + + if func_filter_top is not None: + top = func_filter_top(top, *args) #func_filter_top(top, unmapclass_list, tag_names, class_map_dict, default_class) + selected_rows = selected_rows + top else: - top_rows = nsmallest(num_rows, all_files.values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION])) - tagging_files = {row[0][0] for row in top_rows} - file_exists = (file_location/"tagging.csv").is_file() - with (file_location/"totag.csv").open(mode='w', newline='') as untagged, (file_location/"tagging.csv").open(mode='a', newline='') as tagging: - untagged_writer, tagging_writer = csv.writer(untagged), csv.writer(tagging) - untagged_writer.writerow(header) + top = select_rows(all_files[folder_name].values(), num_rows, is_largest = pick_max) + if func_filter_top is not None: + top = func_filter_top(top, args) + selected_rows = selected_rows + top + return selected_rows, totag_list, header + +def write_tag_csvs(selected_rows, totag_list, file_location_totag, file_location_togging, header): + selected_filenames = {row[0][FILENAME_LOCATION] for row in selected_rows} + file_exists = file_location_togging.is_file() + with file_location_totag.open(mode='w', newline='') as totag, file_location_togging.open(mode='a', newline='') as tagging: + totag_writer, tagging_writer = csv.writer(totag), csv.writer(tagging) + totag_writer.writerow(header) if not file_exists: tagging_writer.writerow(header) - for row in csv_list: - (tagging_writer if row[0] in tagging_files else untagged_writer).writerow(row) - return top_rows + for row in totag_list: + (tagging_writer if row[FILENAME_LOCATION] in selected_filenames else totag_writer).writerow(row) + + +def create_init_vott_json(file_location, num_rows, user_folders, pick_max, image_loc, output_location, blob_credentials, + tag_names, new_tag_names, max_tags_per_pixel=None, config_class_balance=None, colors=None, *args): + print("Creting VOTT json using pre-init classes") + file_location_init_totag = (file_location / "init_totag.csv") + file_location_tagging = (file_location / "tagging.csv") + file_location_totag = (file_location / "totag.csv") + selected_rows, totag_list, header = get_top_rows(file_location_init_totag, num_rows, user_folders, pick_max, tag_names, + config_class_balance, filter_top, *args) + + write_tag_csvs(selected_rows, totag_list, file_location_init_totag, file_location_tagging, header) + write_tag_csvs(selected_rows, totag_list, file_location_totag, file_location_tagging, header) + + default_class = args[-1] + new_tag_names = add_class_name(new_tag_names, default_class) + make_vott_output(selected_rows, output_location, user_folders, image_loc, blob_credentials=blob_credentials, + tag_names=new_tag_names, tag_colors=colors, max_tags_per_pixel=max_tags_per_pixel) -def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc, output_location, blob_credentials=None, tag_names = ["stamp"], max_tags_per_pixel=None): - all_files = get_top_rows(file_location, num_rows, user_folders, pick_max) - # The tag_colors list generates random colors for each tag. To ensure that these colors stand out / are easy to see on a picture, the colors are generated - # in the hls format, with the random numbers biased towards a high luminosity (>=.8) and saturation (>=.75). - make_vott_output(all_files, output_location, user_folders, image_loc, blob_credentials=blob_credentials, tag_names=tag_names, - tag_colors=['#%02x%02x%02x' % (int(256*r), int(256*g), int(256*b)) for - r,g,b in [colorsys.hls_to_rgb(random.random(),0.8 + random.random()/5.0, 0.75 + random.random()/4.0) for _ in tag_names]], max_tags_per_pixel=max_tags_per_pixel) + +def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc, output_location, blob_credentials=None, + tag_names = ["stamp"], max_tags_per_pixel=None, config_class_balance=None, colors = None): + file_location_totag = (file_location / "totag.csv") + file_location_togging = (file_location / "tagging.csv") + selected_rows, totag_list, header = get_top_rows(file_location_totag, num_rows, user_folders, pick_max, tag_names, config_class_balance) + + write_tag_csvs(selected_rows, totag_list, file_location_totag, file_location_togging, header) + + make_vott_output(selected_rows, output_location, user_folders, image_loc, blob_credentials=blob_credentials, + tag_names= tag_names, tag_colors=colors, max_tags_per_pixel=max_tags_per_pixel) if __name__ == "__main__": - #create_vott_json(r"C:\Users\t-yapand\Desktop\GAUCC1_1533070087147.csv",20, True, r"C:\Users\t-yapand\Desktop\GAUCC", r"C:\Users\t-yapand\Desktop\Output\GAUCC") - import re - import time - from azure.storage.blob import BlockBlobService - import sys - import os - # Allow us to import utils - config_dir = str(Path.cwd().parent / "utils") - if config_dir not in sys.path: - sys.path.append(config_dir) - from config import Config if len(sys.argv)<3: - raise ValueError("Need to specify number of images (first arg) and config file (second arg)") + raise ValueError("Need to specify number of images (first arg) and config file (second arg). Optionally provide psth to init_classes_map.json") config_file = Config.parse_file(sys.argv[2]) block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"]) container_name = config_file["label_container_name"] shutil.rmtree(config_file["tagging_location"], ignore_errors=True) csv_file_loc = Path(config_file["tagging_location"]) + #csv_file_loc = #Path("test_totag.csv") csv_file_loc.mkdir(parents=True, exist_ok=True) - file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'totag_(.*).csv', blob.name)] - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"totag.csv")) - container_name = config_file["image_container_name"] - file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)] - if file_date: - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagging.csv")) - create_vott_json(csv_file_loc, int(sys.argv[1]), config_file["user_folders"]=="True", config_file["pick_max"]=="True", "", config_file["tagging_location"], - blob_credentials=(block_blob_service, container_name), tag_names=config_file["classes"].split(","), max_tags_per_pixel=config_file.get("max_tags_per_pixel",None)) + + if len(sys.argv)>3 and 'json' in sys.argv[3].lower(): + print("Using init flow and class mapping json") + json_fn = sys.argv[3] + with open(json_fn, "r") as read_file: + json_config = json.load(read_file) + classmap = json_config["classmap"] + ideal_balance_list = [] + new_tag_names = [] + init_tag_names = [] + class_map_dict = {} + for m in classmap: + ideal_balance_list.append(m['balance']) + new_tag_names.append(m['map']) + init_tag_names.append(m['initclass']) + class_map_dict[m['initclass']] = m['map'] + ideal_balance = ','.join(ideal_balance_list) + unmapclass_list = json_config["unmapclass"] + default_class = json_config["default_class"] + file_location_totag = csv_file_loc / "init_totag.csv" + new_tag_names = add_bkg_class_name(new_tag_names) + ideal_class_balance = parse_class_balance_setting(ideal_balance, len(new_tag_names)) + + file_date = [(blob.name, blob.properties.last_modified) for blob in + block_blob_service.list_blobs(container_name) if re.match(r'init_totag_(.*).csv', blob.name)] + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x: x[1])[0], + str(file_location_totag)) + + create_init_vott_json(csv_file_loc, int(sys.argv[1]), config_file["user_folders"] == "True", + config_file["pick_max"] == "True", "", + config_file["tagging_location"], (block_blob_service, config_file["image_container_name"]), + init_tag_names, + new_tag_names, + config_file.get("max_tags_per_pixel"), + ideal_class_balance, + None, #colors + unmapclass_list, init_tag_names, class_map_dict, default_class) + + else: + file_date = [(blob.name, blob.properties.last_modified) for blob in + block_blob_service.list_blobs(container_name) if re.match(r'totag_(.*).csv', blob.name)] + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x: x[1])[0], + str(csv_file_loc / "totag.csv")) + file_date = [(blob.name, blob.properties.last_modified) for blob in + block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)] + ideal_class_balance = config_file["ideal_class_balance"].split(",") + if file_date: + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x: x[1])[0], + str(csv_file_loc / "tagging.csv")) + tag_names = add_bkg_class_name(config_file["classes"].split(",")) + ideal_class_balance = parse_class_balance_setting(config_file.get("ideal_class_balance"), len(tag_names)) + create_vott_json(csv_file_loc, int(sys.argv[1]), config_file["user_folders"]=="True", config_file["pick_max"]=="True", "", + config_file["tagging_location"], blob_credentials=(block_blob_service, config_file["image_container_name"]), + tag_names= tag_names, + max_tags_per_pixel=config_file.get("max_tags_per_pixel"), + config_class_balance = ideal_class_balance) container_name = config_file["label_container_name"] block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagging.csv")) block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("totag",int(time.time() * 1000),"csv"), str(csv_file_loc/"totag.csv")) diff --git a/tag/upload_vott_json.py b/tag/upload_vott_json.py index 284c152b..84925f92 100644 --- a/tag/upload_vott_json.py +++ b/tag/upload_vott_json.py @@ -29,7 +29,8 @@ def select_jsons(image_directory, user_folders, file_location): for json_file, sorted_images in zip(all_jsons, all_images): image_directory = Path(json_file.rsplit(".", 1)[0]).stem - json_file = json.load(open(json_file))["frames"] + with open(json_file, "r") as read_file: + json_file = json.load(read_file)["frames"] if (file_location/"tagging.csv").is_file(): with (file_location/"tagging.csv").open(mode='r') as file: @@ -49,8 +50,8 @@ def select_jsons(image_directory, user_folders, file_location): csv_writer.writerow(["filename","class","xmin","xmax","ymin","ymax","height","width"]) for index,(filename,true_height,true_width) in enumerate(sorted_images): tagged.add(filename) - if str(index) in json_file: - all_frames = json_file[str(index)] + if filename in json_file: + all_frames = json_file[filename] if all_frames: for cur_frame in all_frames: if cur_frame: @@ -78,7 +79,6 @@ def select_jsons(image_directory, user_folders, file_location): tagging_writer.writerow(row) if __name__ == "__main__": - #select_jsons(r"C:\Users\t-yapand\Desktop\GAUCC",r"C:\Users\t-yapand\Desktop\GAUCC.json",True,r"C:\Users\t-yapand\Desktop\GAUCC1_1533070038606.csv") from azure.storage.blob import BlockBlobService import sys import os @@ -95,11 +95,11 @@ def select_jsons(image_directory, user_folders, file_location): csv_file_loc = Path(config_file["tagging_location"]) file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)] if file_date: - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], csv_file_loc/"tagged.csv") + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagged.csv")) file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)] if file_date: - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], csv_file_loc/"tagging.csv") + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagging.csv")) #TODO: Ensure this parses folder recursively when given tagging location. Remove the .json part select_jsons(config_file["tagging_location"],config_file["user_folders"]=="True",csv_file_loc) - block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagged",int(time.time() * 1000),"csv"), csv_file_loc/"tagged.csv") - block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), csv_file_loc/"tagging.csv") + block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagged",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagged.csv")) + block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagging.csv")) diff --git a/test/Images_source.json b/test/Images_source.json new file mode 100644 index 00000000..be3b3bd1 --- /dev/null +++ b/test/Images_source.json @@ -0,0 +1 @@ +{"framerate": "1", "frames": {"st1026.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 144, "x2": 174, "y1": 205, "y2": 254}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 142, "x2": 183, "y1": 213, "y2": 248}, {"height": 512.0, "id": 3, "name": 3, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 337, "x2": 361, "y1": 172, "y2": 202}], "st1578.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 293, "x2": 330, "y1": 188, "y2": 223}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 265, "x2": 293, "y1": 401, "y2": 438}], "st1611.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 317, "x2": 348, "y1": 440, "y2": 494}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 37, "x2": 55, "y1": 170, "y2": 189}], "st1840.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 292, "x2": 313, "y1": 134, "y2": 164}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 354, "x2": 377, "y1": 319, "y2": 342}, {"height": 512.0, "id": 3, "name": 3, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 60, "x2": 92, "y1": 392, "y2": 423}]}, "inputTags": "knot,defect", "scd": false, "suggestiontype": "track", "tag_colors": ["#e9f1fe", "#f3e9ff"]} diff --git a/test/Images_source_workdir90.json b/test/Images_source_workdir90.json new file mode 100644 index 00000000..64fc6ec3 --- /dev/null +++ b/test/Images_source_workdir90.json @@ -0,0 +1 @@ +{"framerate": "1", "frames": {"": [], "IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 338, "y1": 50, "y2": 258}, {"height": 480.0, "id": 2, "name": 2, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 190, "y1": 3, "y2": 268}], "IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 600.0, "x1": 50, "x2": 233, "y1": 143, "y2": 293}], "IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["16"], "type": "Rectangle", "width": 600.0, "x1": 291, "x2": 568, "y1": 223, "y2": 455}], "IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 164, "x2": 264, "y1": 69, "y2": 285}, {"height": 480.0, "id": 2, "name": 2, "tags": ["27"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 196, "y1": 102, "y2": 193}], "IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["16"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 475, "y1": 127, "y2": 385}, {"height": 480.0, "id": 2, "name": 2, "tags": ["22"], "type": "Rectangle", "width": 600.0, "x1": 26, "x2": 463, "y1": 125, "y2": 388}], "IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 853.0, "x1": 21, "x2": 403, "y1": 14, "y2": 447}], "IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 853.0, "x1": 385, "x2": 644, "y1": 29, "y2": 327}], "IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["18"], "type": "Rectangle", "width": 853.0, "x1": 477, "x2": 710, "y1": 134, "y2": 307}]}, "inputTags": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90", "scd": false, "suggestiontype": "track", "tag_colors": ["#a5b2f6", "#f6fee6", "#f9a9dd", "#fbc1b4", "#fcbdb1", "#b4e8fb", "#ef9cfd", "#cdc3f8", "#f8c3d1", "#fff9f1", "#fae5fe", "#fcc3cd", "#fed9ff", "#e5f1fc", "#e3f7bf", "#d9f7ac", "#c4cefa", "#ecffb5", "#dcdffb", "#ccb0f9", "#fddddf", "#f1f0ff", "#d8f7a5", "#cfffaf", "#fcbcec", "#f7fffa", "#d3fbb7", "#e7ffd6", "#b0ffcf", "#adf0f4", "#fff2da", "#a7f8d2", "#ffcfd1", "#fc9eec", "#d8d4fb", "#abb7f9", "#fbfffe", "#e3fad2", "#fef3f9", "#dce1fb", "#ebd2fe", "#a2e7f7", "#fff9f8", "#f7c1f7", "#fffbfe", "#a3fdf5", "#daacfa", "#b5e9fe", "#b3fbda", "#ceb5f8", "#fddedf", "#aeeff7", "#d9fbda", "#dffba5", "#fcfff6", "#fcd3b5", "#e6ffa7", "#cbe8fe", "#ecb6f6", "#c8fce6", "#ebdeff", "#fae6c7", "#f8baed", "#cafaeb", "#fcfff8", "#fad7f4", "#ffefef", "#fef2f7", "#b5f9f4", "#ffd6c0", "#f3feeb", "#fbfffd", "#e6f5fc", "#fdfffc", "#e9f7fc", "#cee6ff", "#fffffc", "#f9fed8", "#dafea7", "#ecfed8", "#d7fdeb", "#fdb1cf", "#e4fdc4", "#c9fdbc", "#ffe0c8", "#f6aaac", "#fbfff9", "#f9c6eb", "#fee3fe", "#fbe0e3"]} \ No newline at end of file diff --git a/test/all_predictions.npy b/test/all_predictions.npy new file mode 100644 index 00000000..ed60f2a7 Binary files /dev/null and b/test/all_predictions.npy differ diff --git a/test/all_predictions_cow.npy b/test/all_predictions_cow.npy new file mode 100644 index 00000000..551bf0fd Binary files /dev/null and b/test/all_predictions_cow.npy differ diff --git a/test/board_images_png/st1026.png b/test/board_images_png/st1026.png new file mode 100644 index 00000000..a762f1b5 Binary files /dev/null and b/test/board_images_png/st1026.png differ diff --git a/test/board_images_png/st1194.png b/test/board_images_png/st1194.png new file mode 100644 index 00000000..a5ea2ff7 Binary files /dev/null and b/test/board_images_png/st1194.png differ diff --git a/test/board_images_png/st1578.png b/test/board_images_png/st1578.png new file mode 100644 index 00000000..51407df8 Binary files /dev/null and b/test/board_images_png/st1578.png differ diff --git a/test/board_images_png/st1611.png b/test/board_images_png/st1611.png new file mode 100644 index 00000000..be7e3ec1 Binary files /dev/null and b/test/board_images_png/st1611.png differ diff --git a/test/board_images_png/st1840.png b/test/board_images_png/st1840.png new file mode 100644 index 00000000..b6036430 Binary files /dev/null and b/test/board_images_png/st1840.png differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG new file mode 100644 index 00000000..7f3f37eb Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG new file mode 100644 index 00000000..111c1e48 Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG new file mode 100644 index 00000000..8210bdcc Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG new file mode 100644 index 00000000..189fb420 Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG new file mode 100644 index 00000000..a9af7abf Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG differ diff --git a/test/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG b/test/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG new file mode 100644 index 00000000..a2e7fd9c Binary files /dev/null and b/test/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG differ diff --git a/test/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG b/test/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG new file mode 100644 index 00000000..6c843ee3 Binary files /dev/null and b/test/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG differ diff --git a/test/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG b/test/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG new file mode 100644 index 00000000..b64c2264 Binary files /dev/null and b/test/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG differ diff --git a/test/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG b/test/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG new file mode 100644 index 00000000..9aa21a39 Binary files /dev/null and b/test/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG differ diff --git a/test/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG b/test/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG new file mode 100644 index 00000000..3ebd2697 Binary files /dev/null and b/test/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG differ diff --git a/test/camera_images_source.json b/test/camera_images_source.json new file mode 100644 index 00000000..23fcd092 --- /dev/null +++ b/test/camera_images_source.json @@ -0,0 +1 @@ +{"framerate": "1", "frames": {"IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 338, "y1": 50, "y2": 258}, {"height": 480.0, "id": 2, "name": 2, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 190, "y1": 3, "y2": 268}], "IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["horse"], "type": "Rectangle", "width": 600.0, "x1": 50, "x2": 233, "y1": 143, "y2": 293}], "IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 164, "x2": 264, "y1": 69, "y2": 285}, {"height": 480.0, "id": 2, "name": 2, "tags": ["default"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 196, "y1": 102, "y2": 193}], "IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["horse"], "type": "Rectangle", "width": 853.0, "x1": 385, "x2": 644, "y1": 29, "y2": 327}]}, "inputTags": "person,horse,default", "scd": false, "suggestiontype": "track", "tag_colors": ["#e9f1fe", "#33BBFF", "#FFFF19"]} \ No newline at end of file diff --git a/test/init_class_get_rows_min.npy b/test/init_class_get_rows_min.npy new file mode 100644 index 00000000..0e833df1 Binary files /dev/null and b/test/init_class_get_rows_min.npy differ diff --git a/test/init_classes_map.json b/test/init_classes_map.json new file mode 100644 index 00000000..c19cca39 --- /dev/null +++ b/test/init_classes_map.json @@ -0,0 +1,21 @@ +{ + "classmap": [ + { + "initclass": "1", + "map": "person", + "balance": "0.8" + }, + { + "initclass": "19", + "map": "horse", + "balance": "0.2" + }, + { + "initclass": "NULL", + "map": "NULL", + "balance": "0" + } + ], + "unmapclass":["64","86", "15"], + "default_class":"default" +} \ No newline at end of file diff --git a/test/run_all_tests.py b/test/run_all_tests.py new file mode 100644 index 00000000..c66af653 --- /dev/null +++ b/test/run_all_tests.py @@ -0,0 +1,7 @@ +import unittest +loader = unittest.TestLoader() +start_dir = '.' +suite = loader.discover(start_dir) + +runner = unittest.TextTestRunner() +runner.run(suite) \ No newline at end of file diff --git a/test/test_create_init_predictions.py b/test/test_create_init_predictions.py new file mode 100644 index 00000000..3121eec8 --- /dev/null +++ b/test/test_create_init_predictions.py @@ -0,0 +1,86 @@ +import unittest +import shutil +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from collections import defaultdict +import filecmp + + + +# Allow us to import files from "train' + +train_dir = str(Path.cwd().parent / "train") +if train_dir not in sys.path: + sys.path.append(train_dir) +from create_predictions import get_suggestions, make_csv_output +from tf_detector import TFDetector +import six.moves.urllib as urllib +import tarfile +TEST_WORKDIR = "test_workdir" + +class CreateInitPredictionsTestCase(unittest.TestCase): + def setUp(self): + DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' + MODEL_NAME = 'faster_rcnn_resnet101_coco_2018_01_28' # 'ssd_mobilenet_v1_coco_2017_11_17' + MODEL_FILE = MODEL_NAME + '.tar.gz' + url = DOWNLOAD_BASE + MODEL_FILE + MODEL_FILE_DST = os.path.join(TEST_WORKDIR, MODEL_FILE) + self.froz_graph = os.path.join(TEST_WORKDIR, MODEL_NAME, "frozen_inference_graph.pb") + if not os.path.exists(self.froz_graph): + if not os.path.exists(MODEL_FILE_DST): + print("Downloading model: ", url) + opener = urllib.request.URLopener() + opener.retrieve(url, MODEL_FILE_DST) + print("Unzipping: ", MODEL_FILE_DST) + tar_file = tarfile.open(MODEL_FILE_DST) + for file in tar_file.getmembers(): + file_name = os.path.basename(file.name) + if 'frozen_inference_graph.pb' in file_name: + tar_file.extract(file, TEST_WORKDIR) + + def tearDown(self): + if os.path.exists("untagged.csv"): + os.remove("untagged.csv") + if os.path.exists("tagged_preds.csv"): + os.remove("tagged_preds.csv") + #print("TBD tear down") + + def test_make_csv_output(self): + all_predictions = np.load('all_predictions_cow.npy') + basedir = Path("camera_images") + + CV2_COLOR_LOAD_FLAG = 1 + all_image_files = list(basedir.rglob("*.JPG")) + all_names = [] + all_names += [("camera_images", filename.name) for filename in all_image_files ] + + all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files] + untagged_output = 'untagged.csv' + tagged_output = 'tagged_preds.csv' + already_tagged = defaultdict(set) + make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, + user_folders = True) + + self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_cow.csv'), True, "generated untagged.csv is correct") + + + def test_get_suggestions(self): + classesIDs = list(range(1, 91)) + classes = [str(x) for x in classesIDs] + cur_detector = TFDetector(classes, self.froz_graph) + image_dir = "test_workdir_init_pred" + untagged_output = 'untagged.csv' + tagged_output = 'tagged_preds.csv' + cur_tagged = None + cur_tagging = None + get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, + filetype="*.jpg", min_confidence=0.5, + user_folders=True) + self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_cow.csv'), True, "generated untagged.csv is correct") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_create_predictions.py b/test/test_create_predictions.py new file mode 100644 index 00000000..68bdd560 --- /dev/null +++ b/test/test_create_predictions.py @@ -0,0 +1,71 @@ +import unittest +import shutil +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from collections import defaultdict +import filecmp +import six.moves.urllib as urllib + + + +# Allow us to import files from "train' + +train_dir = str(Path.cwd().parent / "train") +if train_dir not in sys.path: + sys.path.append(train_dir) +from create_predictions import get_suggestions, make_csv_output +from tf_detector import TFDetector + +class CreatePredictionsTestCase(unittest.TestCase): + + def setUp(self): + url = "https://olgaliakrepo.blob.core.windows.net/woodknots/model_knots.pb" + model_file = "model_knots.pb" + if not os.path.exists(model_file): + print("Downloading model: ", url) + opener = urllib.request.URLopener() + opener.retrieve(url, model_file) + + def tearDown(self): + if os.path.exists("untagged.csv"): + os.remove("untagged.csv") + if os.path.exists("tagged_preds.csv"): + os.remove("tagged_preds.csv") + + def test_make_csv_output(self): + all_predictions = np.load('all_predictions.npy') + basedir = Path("board_images_png") + N_IMAGES = 4 + CV2_COLOR_LOAD_FLAG = 1 + all_image_files = list(basedir.rglob("*.png"))[0:N_IMAGES] + all_names = [] + all_names += [("board_images_png", filename.name) for filename in all_image_files ] + + all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files] + untagged_output = 'untagged.csv' + tagged_output = 'tagged_preds.csv' + already_tagged = defaultdict(set) + make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, + user_folders = True) + + self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source.csv'), True, "generated untagged.csv is correct") + + def test_get_suggestions(self): + classes = 'knot,defect' + cur_detector = TFDetector(classes.split(','), 'model_knots.pb') + image_dir = "test_workdir_train" + untagged_output = 'untagged.csv' + tagged_output = 'tagged_preds.csv' + cur_tagged = None + cur_tagging = None + get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, + filetype="*.png", min_confidence=0.5, + user_folders=True) + self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source.csv'), True, "generated untagged.csv is correct") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_download_vott_json.py b/test/test_download_vott_json.py new file mode 100644 index 00000000..6cc01e05 --- /dev/null +++ b/test/test_download_vott_json.py @@ -0,0 +1,269 @@ +import unittest +import shutil +import sys +import os +from pathlib import Path +import filecmp + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config + +tag_dir = str(Path.cwd().parent / "tag") +if tag_dir not in sys.path: + sys.path.append(tag_dir) +from download_vott_json import create_vott_json, get_top_rows, add_bkg_class_name, parse_class_balance_setting, make_vott_output + + +class DownloadVOTTJSONTestCase(unittest.TestCase): + def setUp(self): + self.config_file = Config.parse_file("testconfig.ini") + + self.tagging_location = self.config_file["tagging_location"] + "_test" + shutil.rmtree(self.tagging_location, ignore_errors=True) + self.totag_csv_file_loc = Path(self.config_file["tagging_location"])/"totag.csv" + + Path(self.config_file["tagging_location"]).mkdir(parents=True, exist_ok=True) + self. max_tags_per_pixel = self.config_file.get("max_tags_per_pixel") + self.tag_names = self.config_file["classes"].split(",") + self.user_folders = self.config_file["user_folders"] == "True" + self.pick_max = self.config_file["pick_max"] == "True" + + + + def tearDown(self): + shutil.rmtree(self.tagging_location, ignore_errors=True) + shutil.rmtree("Images", ignore_errors=True) + + if os.path.exists("totag.csv"): + os.remove("totag.csv") + + if os.path.exists("tagging.csv"): + os.remove("tagging.csv") + if os.path.exists("Images.json"): + os.remove("Images.json") + + print("Tear down") + + def test_get_top_rows(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + N_FILES = 3 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']], + [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488', 'board_images_png', '0.9884343', '0.96366304'], + ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488', 'board_images_png', '0.97863936', '0.96366304'], + ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488', 'board_images_png', '0.96366304', '0.96366304']]] + + class_balance = "0.7,0.3,0" + + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_get_top_rows_no_folder(self): + # prepare file + shutil.copyfile("./totag_no_folder_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + N_FILES = 3 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', + '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', + '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', + '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', + '0.98448783', '0.98448783']], + [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488', + '0.9884343', '0.96366304'], + ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488', + '0.97863936', '0.96366304'], + ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488', + '0.96366304', '0.96366304']]] + + class_balance = "0.7,0.3,0" + user_folders = False + + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_get_top_rows_empty_class_balance(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + N_FILES = 3 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']], + [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'], + ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]] + class_balance = '' + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_get_top_rows_invalid_class_balance1(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + N_FILES = 3 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']], + [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'], + ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]] + class_balance = 'Random' + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_get_top_rows_invalid_class_balance2(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + N_FILES = 3 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']], + [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'], + ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]] + + class_balance = '0.1, 0.2, 0.3' + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_get_top_rows_class_balance_min(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 3 + EXPECTED = [[['st1091.png', 'knot', '0.20989896', '0.251748', '0.34986168', '0.3921352', '512', '488', 'board_images_png', '0.99201256', '0.70161'], + ['st1091.png', 'knot', '0.696119', '0.7461088', '0.27078417', '0.33086362', '512', '488', 'board_images_png', '0.9827361', '0.70161'], + ['st1091.png', 'knot', '0.89531857', '0.93743694', '0.4605299', '0.5066802', '512', '488', 'board_images_png', '0.9794672', '0.70161'], + ['st1091.png', 'defect', '0.7629506', '1.0', '0.6205898', '0.67307687', '512', '488', 'board_images_png', '0.74762243', '0.70161'], + ['st1091.png', 'knot', '0.14214082', '0.247842', '0.7355515', '0.8967391', '512', '488', 'board_images_png', '0.7072498', '0.70161'], + ['st1091.png', 'defect', '0.0', '0.1281265', '0.55038965', '0.59755194', '512', '488', 'board_images_png', '0.70161', '0.70161']], + [['st1185.png', 'knot', '0.6978268', '0.7582275', '0.66821593', '0.7535644', '512', '488', 'board_images_png', '0.97257924', '0.7035888'], + ['st1185.png', 'defect', '0.35780182', '0.60781866', '0.27580062', '0.32093963', '512', '488', 'board_images_png', '0.9720861', '0.7035888'], + ['st1185.png', 'knot', '0.5183983', '0.57071316', '0.84764653', '0.91617334', '512', '488', 'board_images_png', '0.9241496', '0.7035888'], + ['st1185.png', 'knot', '0.55567926', '0.5904746', '0.51832056', '0.5461106', '512', '488', 'board_images_png', '0.7035888', '0.7035888']], + [['st1192.png', 'knot', '0.39846605', '0.45543727', '0.36765742', '0.4488806', '512', '488', 'board_images_png', '0.99612194', '0.7127546'], + ['st1192.png', 'defect', '0.07790943', '0.44866413', '0.5975798', '0.640683', '512', '488', 'board_images_png', '0.80447847', '0.7127546'], + ['st1192.png', 'defect', '0.47953823', '0.7499259', '0.5517361', '0.59940904', '512', '488', 'board_images_png', '0.7127546', '0.7127546']]] + + pick_max = False + class_balance = "0.7,0.3,0" + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + pick_max, tag_names, ideal_class_balance) + #self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_create_vott_json(self): + # prepare file + shutil.copyfile("./totag_source.csv", "./totag.csv") + + N_ROWS = 3 + N_FILES = 3 + FOLDER_NAME = "board_images_png" + class_balance = "0.7,0.3,0" + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + + create_vott_json(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, "", + self.tagging_location, blob_credentials=None, + tag_names= tag_names, + max_tags_per_pixel=self. max_tags_per_pixel, + config_class_balance= ideal_class_balance) + + res_folder = os.path.join(self.tagging_location, FOLDER_NAME) + res_immages_cnt = sum([len(files) for r, d, files in os.walk(res_folder)]) + self.assertEqual(N_FILES, res_immages_cnt) + + def test_get_top_rows_with_bkg(self): + # prepare file + shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc)) + + N_ROWS = 5 + N_FILES = 5 + EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'], + ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']], + [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']], + [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', + 'board_images_png', '0.99616516', '0.9843567'], + ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', + 'board_images_png', '0.9843567', '0.9843567']], + [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488', 'board_images_png', '0.9884343', '0.96366304'], + ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488', 'board_images_png', '0.97863936', '0.96366304'], + ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488', 'board_images_png', '0.96366304', '0.96366304']], + [['st1524.png', 'NULL', '0', '0', '0', '0', '512', '488', 'board_images_png', '0', '0.05']]] + + class_balance = "0.6, 0.29, 0.11" + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names)) + + all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders, + self.pick_max, tag_names, ideal_class_balance) + self.assertEqual(len(all_rows), N_FILES, 'number of rows') + self.assertEqual(all_rows, EXPECTED, 'raw values') + + def test_create_vott_json(self): + # prepare file + shutil.copyfile("./totag_source2.csv", "totag.csv") + + csv_file_loc = Path('.') + N_IMAGES = 4 + user_folders = False + pick_max = True + tagging_location = "." + tag_names = add_bkg_class_name(self.tag_names) + ideal_class_balance = parse_class_balance_setting(None, len(tag_names)) + create_vott_json(csv_file_loc, N_IMAGES, user_folders, + pick_max, "board_images_png", + tagging_location, blob_credentials = None, + tag_names=tag_names, + max_tags_per_pixel= 2, + config_class_balance=ideal_class_balance, + colors = ["#e9f1fe", "#f3e9ff"]) + self.assertEqual(filecmp.cmp('Images.json', 'Images_source.json'), True, "generated VOTT json is correct") + + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_init_download_vott_json.py b/test/test_init_download_vott_json.py new file mode 100644 index 00000000..9d15e54e --- /dev/null +++ b/test/test_init_download_vott_json.py @@ -0,0 +1,168 @@ +import unittest +import shutil +import sys +import os +from pathlib import Path +import filecmp +import json +import numpy as np + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config + +tag_dir = str(Path.cwd().parent / "tag") +if tag_dir not in sys.path: + sys.path.append(tag_dir) +from download_vott_json import create_init_vott_json, create_vott_json, get_top_rows, filter_top, add_bkg_class_name, remove_bkg_class_name, parse_class_balance_setting + + +class DownloadInitVOTTJSONTestCase(unittest.TestCase): + def setUp(self): + self.config_file = Config.parse_file("testconfig.ini") + + self.tagging_location = self.config_file["tagging_location"] + "_test" + shutil.rmtree(self.tagging_location, ignore_errors=True) + self.csv_file_loc = Path(self.config_file["tagging_location"]) + + self.csv_file_loc.mkdir(parents=True, exist_ok=True) + self. max_tags_per_pixel = self.config_file.get("max_tags_per_pixel") + self.tag_names = self.config_file["classes"].split(",") + self.user_folders = self.config_file["user_folders"] == "True" + self.pick_max = self.config_file["pick_max"] == "True" + + + + def tearDown(self): + shutil.rmtree(self.tagging_location, ignore_errors=True) + shutil.rmtree("Images", ignore_errors=True) + + shutil.rmtree("test_workdir/camera_images", ignore_errors=True) + shutil.rmtree("test_workdir90", ignore_errors=True) + if os.path.exists(r"test_workdir/camera_images.json"): + os.remove(r"test_workdir/camera_images.json") + + if os.path.exists("totag.csv"): + os.remove("totag.csv") + + if os.path.exists("tagging.csv"): + os.remove("tagging.csv") + if os.path.exists("Images.json"): + os.remove("Images.json") + + if os.path.exists("init_totag.csv"): + os.remove("init_totag.csv") + + print("Tear down") + + + def test_create_vott_json_90(self): + # prepare file + shutil.copyfile("./untagged_cow.csv", "totag.csv") + + csv_file_loc = Path('.') + FOLDER = "camera_images" + N_IMAGES = sum([len(files) for r, d, files in os.walk(FOLDER)]) + user_folders = False + pick_max = True + tagging_location = "." + tagging_location = "test_workdir90" + classesIDs = [str(i) for i in (range(1, 91))] + tag_names = add_bkg_class_name(classesIDs) + + ideal_class_balance = parse_class_balance_setting(None, len(tag_names)) + create_vott_json(csv_file_loc, N_IMAGES, user_folders, + pick_max, FOLDER, + tagging_location, blob_credentials = None, + tag_names= tag_names, + max_tags_per_pixel= 2, + config_class_balance=ideal_class_balance + ) + + + + self.assertEqual(filecmp.cmp(os.path.join(tagging_location, 'Images.json'), 'Images_source_workdir90.json'), True, "generated VOTT json is correct") + + def test_get_filtered(self): + shutil.copyfile("./untagged_cow.csv", "init_totag.csv") + json_fn = "init_classes_map.json" + json_config = None + with open(json_fn, "r") as read_file: + json_config = json.load(read_file) + classmap = json_config["classmap"] + ideal_balance_list = [] + new_tag_names = [] + init_tag_names = [] + class_map_dict = {} + for m in classmap: + ideal_balance_list.append(m['balance']) + new_tag_names.append(m['map']) + init_tag_names.append(m['initclass']) + class_map_dict[m['initclass']] = m['map'] + ideal_balance = ','.join(ideal_balance_list) + unmapclass_list = json_config["unmapclass"] + default_class = json_config["default_class"] + file_location_totag = Path('.')/"init_totag.csv" + new_tag_names = add_bkg_class_name(new_tag_names) + ideal_class_balance = parse_class_balance_setting(ideal_balance, len(new_tag_names)) + + rows, _, _ = get_top_rows(file_location_totag, 10, True, False, + init_tag_names, ideal_class_balance, + filter_top, + unmapclass_list, init_tag_names, class_map_dict, default_class) + + expected_rows = np.load("init_class_get_rows_min.npy") + self.assertEqual((rows == expected_rows).all(), True) + print("") + + def test_create_vott_json(self): + # prepare file + shutil.copyfile("./untagged_cow.csv", "init_totag.csv") + + csv_file_loc = Path('.') + FOLDER = "camera_images" + N_IMAGES = 10 + user_folders = True + pick_max = False + tagging_location = "test_workdir" + + json_fn = "init_classes_map.json" + json_config = None + with open(json_fn, "r") as read_file: + json_config = json.load(read_file) + classmap = json_config["classmap"] + ideal_balance_list = [] + new_tag_names = [] + init_tag_names = [] + class_map_dict = {} + for m in classmap: + ideal_balance_list.append(m['balance']) + new_tag_names.append(m['map']) + init_tag_names.append(m['initclass']) + class_map_dict[m['initclass']] = m['map'] + + unmapclass_list = json_config["unmapclass"] + default_class = json_config["default_class"] + ideal_balance = ','.join(ideal_balance_list) + new_tag_names.append(default_class) + new_tag_names = remove_bkg_class_name(new_tag_names) + ideal_class_balance = parse_class_balance_setting(ideal_balance, len(init_tag_names)) + + create_init_vott_json(csv_file_loc , N_IMAGES, user_folders, + pick_max, + "", #image loc + tagging_location, + None, #blob creds + init_tag_names, + new_tag_names, + 2, #max pix + ideal_class_balance, + ["#e9f1fe", "#33BBFF", "#FFFF19"], #colors + unmapclass_list, init_tag_names, class_map_dict, default_class ) + + self.assertEqual(filecmp.cmp(os.path.join( tagging_location, FOLDER +'.json'), FOLDER + '_source.json'), True, "generated VOTT json is correct") + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_init_tf_detector.py b/test/test_init_tf_detector.py new file mode 100644 index 00000000..4fcf64d4 --- /dev/null +++ b/test/test_init_tf_detector.py @@ -0,0 +1,76 @@ +import unittest +import shutil +import sys +import os +import numpy as np +import cv2 +from pathlib import Path +import six.moves.urllib as urllib +import tarfile + + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config + +train_dir = str(Path.cwd().parent / "train") +if train_dir not in sys.path: + sys.path.append(train_dir) +from tf_detector import TFDetector + +TEST_WORKDIR = "test_workdir" + +class TFDetectorTestCase(unittest.TestCase): + def setUp(self): + DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' + MODEL_NAME = 'faster_rcnn_resnet101_coco_2018_01_28' # 'ssd_mobilenet_v1_coco_2017_11_17' + MODEL_FILE = MODEL_NAME + '.tar.gz' + url = DOWNLOAD_BASE + MODEL_FILE + MODEL_FILE_DST = os.path.join(TEST_WORKDIR, MODEL_FILE) + self.froz_graph = os.path.join(TEST_WORKDIR, MODEL_NAME, "frozen_inference_graph.pb") + if not os.path.exists(self.froz_graph): + if not os.path.exists(MODEL_FILE_DST): + print("Downloading model: ", url) + opener = urllib.request.URLopener() + opener.retrieve(url, MODEL_FILE_DST) + print("Unzipping: ", MODEL_FILE_DST) + tar_file = tarfile.open(MODEL_FILE_DST) + for file in tar_file.getmembers(): + file_name = os.path.basename(file.name) + if 'frozen_inference_graph.pb' in file_name: + tar_file.extract(file, TEST_WORKDIR) + + def tearDown(self): + #shutil.rmtree(self.tagging_location, ignore_errors=True) + print("Tear down") + + def test_predict(self): + classesIDs = list(range(1,91)) + classes = ','.join(str(x) for x in classesIDs ) + detector = TFDetector(classes.split(','),self.froz_graph) + basedir = Path("camera_images") + + all_image_files = list(basedir.rglob("*.JPG")) + image_size = (1000,750) + NUM_CHANNELS = 3 + CV2_COLOR_LOAD_FLAG = 1 + all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) + for curindex, image in enumerate(all_image_files): + all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size) + all_predictions = detector.predict(all_images, min_confidence=0.5) + + self.assertEqual(len(all_predictions), len(all_image_files)) + + expected_allpred = np.load('all_predictions_cow.npy') + + self.assertEqual((all_predictions == expected_allpred).all(), True, + "(expected_allpred == all_predictions).all()") + + #np.save('all_predictions_cow', all_predictions) + + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_make_vott_output.py b/test/test_make_vott_output.py new file mode 100644 index 00000000..a4b69af4 --- /dev/null +++ b/test/test_make_vott_output.py @@ -0,0 +1,43 @@ +import unittest +import shutil +import sys +import os +from pathlib import Path +import filecmp +from azure.storage.blob import BlockBlobService + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config + +tag_dir = str(Path.cwd().parent / "tag") +if tag_dir not in sys.path: + sys.path.append(tag_dir) +from download_vott_json import create_vott_json, get_top_rows, add_bkg_class_name, parse_class_balance_setting, make_vott_output + +class MakeVOTTOutputTestCase(unittest.TestCase): + def setUp(self): + print("no-op") + + # Uncomment code below for "ond-demand' VOTT json creaation using data on blob storage + # def test_download_catdata(self): + # #dowload data from tagged_Abram_small + # config_file = Config.parse_file( r'../workconfig.ini') + # + # block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], + # account_key=config_file["AZURE_STORAGE_KEY"]) + # container_name = config_file["image_container_name"] + # file_location = Path('D://temp') + # image_loc = 'D://temp' + # + # file_location_totag = (file_location / "totag.csv") + # create_vott_json(file_location, num_rows=1024, user_folders = True, pick_max = True, image_loc = "", output_location = file_location, + # blob_credentials=(block_blob_service, container_name), + # tag_names=["human","iguana"], max_tags_per_pixel=None, config_class_balance=None, colors=None) + # self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_tf_detector.py b/test/test_tf_detector.py new file mode 100644 index 00000000..388e1146 --- /dev/null +++ b/test/test_tf_detector.py @@ -0,0 +1,57 @@ +import unittest +import shutil +import sys +import os +import numpy as np +import cv2 +from pathlib import Path +import six.moves.urllib as urllib + + +# Allow us to import utils +config_dir = str(Path.cwd().parent / "utils") +if config_dir not in sys.path: + sys.path.append(config_dir) +from config import Config + +train_dir = str(Path.cwd().parent / "train") +if train_dir not in sys.path: + sys.path.append(train_dir) +from tf_detector import TFDetector + +class TFDetectorTestCase(unittest.TestCase): + def setUp(self): + url = "https://olgaliakrepo.blob.core.windows.net/woodknots/model_knots.pb" + model_file = "model_knots.pb" + if not os.path.exists(model_file): + print("Downloading model: ", url) + opener = urllib.request.URLopener() + opener.retrieve(url, model_file) + + def tearDown(self): + #shutil.rmtree(self.tagging_location, ignore_errors=True) + print("Tear down") + + def test_predict(self): + classes = 'knot,defect' + detector = TFDetector(classes.split(','),'model_knots.pb') + + basedir = Path("board_images_png") + N_IMAGES = 4 + all_image_files = list(basedir.rglob("*.png"))[0:N_IMAGES] + image_size = (1000,750) + NUM_CHANNELS = 3 + CV2_COLOR_LOAD_FLAG = 1 + all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) + for curindex, image in enumerate(all_image_files): + all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size) + all_predictions = detector.predict(all_images, min_confidence=0.5) + + self.assertEqual(len(all_predictions), N_IMAGES) + + expected_allpred = np.load('all_predictions.npy') + self.assertEqual((all_predictions == expected_allpred).all(), True, "(expected_allpred == all_predictions).all()") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_vott_output.json b/test/test_vott_output.json new file mode 100644 index 00000000..b600dfa1 --- /dev/null +++ b/test/test_vott_output.json @@ -0,0 +1,555 @@ +{ + "frames": { + "1012.png": [ + { + "x1": 39.9155672823219, + "y1": 301.0251256281407, + "x2": 65.66754617414249, + "y2": 328.04020100502515, + "width": 488, + "height": 512, + "box": { + "x1": 39.9155672823219, + "y1": 301.0251256281407, + "x2": 65.66754617414249, + "y2": 328.04020100502515 + }, + "UID": "8d041221", + "id": 0, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 392.0738786279683, + "y1": 346.0502512562814, + "x2": 420.401055408971, + "y2": 373.06532663316585, + "width": 488, + "height": 512, + "box": { + "x1": 392.0738786279683, + "y1": 346.0502512562814, + "x2": 420.401055408971, + "y2": 373.06532663316585 + }, + "UID": "fa3ce672", + "id": 1, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + } + ], + "1013.png": [ + { + "x1": 122.96569920844327, + "y1": 122.21105527638191, + "x2": 171.89445910290237, + "y2": 191.678391959799, + "width": 488, + "height": 512, + "box": { + "x1": 122.96569920844327, + "y1": 122.21105527638191, + "x2": 171.89445910290237, + "y2": 191.678391959799 + }, + "UID": "a197e6f2", + "id": 2, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 65.02374670184696, + "y1": 433.52763819095475, + "x2": 112.66490765171504, + "y2": 459.2562814070352, + "width": 488, + "height": 512, + "box": { + "x1": 65.02374670184696, + "y1": 433.52763819095475, + "x2": 112.66490765171504, + "y2": 459.2562814070352 + }, + "UID": "29fc21aa", + "id": 3, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + }, + { + "x1": 152.58047493403694, + "y1": 437.38693467336685, + "x2": 191.2084432717678, + "y2": 505.5678391959799, + "width": 488, + "height": 512, + "box": { + "x1": 152.58047493403694, + "y1": 437.38693467336685, + "x2": 191.2084432717678, + "y2": 505.5678391959799 + }, + "UID": "b302a42c", + "id": 4, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 3 + }, + { + "x1": 347.00791556728234, + "y1": 393.00502512562815, + "x2": 372.7598944591029, + "y2": 418.73366834170855, + "width": 488, + "height": 512, + "box": { + "x1": 347.00791556728234, + "y1": 393.00502512562815, + "x2": 372.7598944591029, + "y2": 418.73366834170855 + }, + "UID": "d75d3848", + "id": 5, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 4 + }, + { + "x1": 374.0474934036939, + "y1": 439.95979899497485, + "x2": 399.79947229551453, + "y2": 465.6884422110553, + "width": 488, + "height": 512, + "box": { + "x1": 374.0474934036939, + "y1": 439.95979899497485, + "x2": 399.79947229551453, + "y2": 465.6884422110553 + }, + "UID": "0cf73292", + "id": 6, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 5 + } + ], + "1014.png": [ + { + "x1": 216.9604221635884, + "y1": 203.25628140703517, + "x2": 242.71240105540898, + "y2": 230.2713567839196, + "width": 488, + "height": 512, + "box": { + "x1": 216.9604221635884, + "y1": 203.25628140703517, + "x2": 242.71240105540898, + "y2": 230.2713567839196 + }, + "UID": "2e9f323c", + "id": 7, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 300.65435356200527, + "y1": 364.0603015075377, + "x2": 326.4063324538259, + "y2": 389.7889447236181, + "width": 488, + "height": 512, + "box": { + "x1": 300.65435356200527, + "y1": 364.0603015075377, + "x2": 326.4063324538259, + "y2": 389.7889447236181 + }, + "UID": "8bf4dd37", + "id": 8, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + } + ], + "1015.png": [ + { + "x1": 300.65435356200527, + "y1": 300.3819095477387, + "x2": 326.4063324538259, + "y2": 326.1105527638191, + "width": 488, + "height": 512, + "box": { + "x1": 300.65435356200527, + "y1": 300.3819095477387, + "x2": 326.4063324538259, + "y2": 326.1105527638191 + }, + "UID": "80c5ac63", + "id": 9, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 226.6174142480211, + "y1": 402.6532663316583, + "x2": 252.3693931398417, + "y2": 429.6683417085427, + "width": 488, + "height": 512, + "box": { + "x1": 226.6174142480211, + "y1": 402.6532663316583, + "x2": 252.3693931398417, + "y2": 429.6683417085427 + }, + "UID": "b559e9b2", + "id": 10, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + } + ], + "1016.png": [ + { + "x1": 156.44327176781002, + "y1": 236.7035175879397, + "x2": 204.0844327176781, + "y2": 307.4572864321608, + "width": 488, + "height": 512, + "box": { + "x1": 156.44327176781002, + "y1": 236.7035175879397, + "x2": 204.0844327176781, + "y2": 307.4572864321608 + }, + "UID": "5368d1fa", + "id": 11, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 375.9788918205805, + "y1": 281.7286432160804, + "x2": 401.73087071240104, + "y2": 307.4572864321608, + "width": 488, + "height": 512, + "box": { + "x1": 375.9788918205805, + "y1": 281.7286432160804, + "x2": 401.73087071240104, + "y2": 307.4572864321608 + }, + "UID": "a603a804", + "id": 12, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + }, + { + "x1": 383.70448548812664, + "y1": 461.8291457286432, + "x2": 409.45646437994725, + "y2": 487.5577889447236, + "width": 488, + "height": 512, + "box": { + "x1": 383.70448548812664, + "y1": 461.8291457286432, + "x2": 409.45646437994725, + "y2": 487.5577889447236 + }, + "UID": "e953b706", + "id": 13, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 3 + }, + { + "x1": 146.1424802110818, + "y1": 441.2462311557789, + "x2": 188.63324538258576, + "y2": 510.713567839196, + "width": 488, + "height": 512, + "box": { + "x1": 146.1424802110818, + "y1": 441.2462311557789, + "x2": 188.63324538258576, + "y2": 510.713567839196 + }, + "UID": "29a0b420", + "id": 14, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 4 + }, + { + "x1": 382.4168865435356, + "y1": 419.37688442211055, + "x2": 408.1688654353562, + "y2": 445.10552763819095, + "width": 488, + "height": 512, + "box": { + "x1": 382.4168865435356, + "y1": 419.37688442211055, + "x2": 408.1688654353562, + "y2": 445.10552763819095 + }, + "UID": "73e1a76e", + "id": 15, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 5 + } + ], + "1017.png": [ + { + "x1": 78.54353562005277, + "y1": 356.34170854271355, + "x2": 104.29551451187335, + "y2": 385.92964824120605, + "width": 488, + "height": 512, + "box": { + "x1": 78.54353562005277, + "y1": 356.34170854271355, + "x2": 104.29551451187335, + "y2": 385.92964824120605 + }, + "UID": "da911bba", + "id": 17, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 433.9208443271768, + "y1": 319.035175879397, + "x2": 459.67282321899734, + "y2": 344.7638190954774, + "width": 488, + "height": 512, + "box": { + "x1": 433.9208443271768, + "y1": 319.035175879397, + "x2": 459.67282321899734, + "y2": 344.7638190954774 + }, + "UID": "74bd9c6d", + "id": 18, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + }, + { + "x1": 50.21635883905013, + "y1": 155.65829145728642, + "x2": 75.96833773087072, + "y2": 181.38693467336682, + "width": 488, + "height": 512, + "box": { + "x1": 50.21635883905013, + "y1": 155.65829145728642, + "x2": 75.96833773087072, + "y2": 181.38693467336682 + }, + "UID": "ee2edadf", + "id": 19, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 3 + }, + { + "x1": 49.57255936675462, + "y1": 221.26633165829145, + "x2": 75.3245382585752, + "y2": 246.99497487437185, + "width": 488, + "height": 512, + "box": { + "x1": 49.57255936675462, + "y1": 221.26633165829145, + "x2": 75.3245382585752, + "y2": 246.99497487437185 + }, + "UID": "ffe26cb3", + "id": 20, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 4 + } + ], + "1018.png": [ + { + "x1": 74.03693931398416, + "y1": 301.6683417085427, + "x2": 99.78891820580475, + "y2": 327.3969849246231, + "width": 488, + "height": 512, + "box": { + "x1": 74.03693931398416, + "y1": 301.6683417085427, + "x2": 99.78891820580475, + "y2": 327.3969849246231 + }, + "UID": "db0af2df", + "id": 16, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + } + ], + "1019.png": [], + "1020.png": [], + "1021.png": [], + "1022.png": [], + "1023.png": [], + "1024.png": [ + { + "x1": 318.6807387862797, + "y1": 248.28140703517587, + "x2": 344.4327176781003, + "y2": 274.0100502512563, + "width": 488, + "height": 512, + "box": { + "x1": 318.6807387862797, + "y1": 248.28140703517587, + "x2": 344.4327176781003, + "y2": 274.0100502512563 + }, + "UID": "7ee346bf", + "id": 21, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 1 + }, + { + "x1": 0, + "y1": 272.08040201005025, + "x2": 224.68601583113457, + "y2": 297.80904522613065, + "width": 488, + "height": 512, + "box": { + "x1": 0, + "y1": 272.08040201005025, + "x2": 224.68601583113457, + "y2": 297.80904522613065 + }, + "UID": "ab7e3233", + "id": 22, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 2 + }, + { + "x1": 0, + "y1": 75.89949748743719, + "x2": 85.62532981530343, + "y2": 486.2713567839196, + "width": 488, + "height": 512, + "box": { + "x1": 0, + "y1": 75.89949748743719, + "x2": 85.62532981530343, + "y2": 486.2713567839196 + }, + "UID": "392e1696", + "id": 23, + "type": "Rectangle", + "tags": [ + "ataggu" + ], + "name": 3 + } + ], + "1025.png": [], + "1026.png": [], + "1027.png": [], + "1028.png": [] + }, + "framerate": "1", + "inputTags": "ataggu", + "suggestiontype": "track", + "scd": false, + "visitedFrames": [ + "1012.png", + "1013.png", + "1014.png", + "1015.png", + "1016.png", + "1017.png", + "1018.png", + "1019.png", + "1020.png", + "1021.png", + "1022.png", + "1023.png", + "1024.png" + ], + "tag_colors": [ + "#0fded8" + ] +} \ No newline at end of file diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG new file mode 100644 index 00000000..7f3f37eb Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG new file mode 100644 index 00000000..111c1e48 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG new file mode 100644 index 00000000..8210bdcc Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG new file mode 100644 index 00000000..189fb420 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG new file mode 100644 index 00000000..a9af7abf Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG new file mode 100644 index 00000000..a2e7fd9c Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG new file mode 100644 index 00000000..6c843ee3 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG new file mode 100644 index 00000000..b64c2264 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG new file mode 100644 index 00000000..9aa21a39 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG differ diff --git a/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG new file mode 100644 index 00000000..3ebd2697 Binary files /dev/null and b/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG differ diff --git a/test/test_workdir_train/board_images_png/st1026.png b/test/test_workdir_train/board_images_png/st1026.png new file mode 100644 index 00000000..a762f1b5 Binary files /dev/null and b/test/test_workdir_train/board_images_png/st1026.png differ diff --git a/test/test_workdir_train/board_images_png/st1194.png b/test/test_workdir_train/board_images_png/st1194.png new file mode 100644 index 00000000..a5ea2ff7 Binary files /dev/null and b/test/test_workdir_train/board_images_png/st1194.png differ diff --git a/test/test_workdir_train/board_images_png/st1578.png b/test/test_workdir_train/board_images_png/st1578.png new file mode 100644 index 00000000..51407df8 Binary files /dev/null and b/test/test_workdir_train/board_images_png/st1578.png differ diff --git a/test/test_workdir_train/board_images_png/st1611.png b/test/test_workdir_train/board_images_png/st1611.png new file mode 100644 index 00000000..be7e3ec1 Binary files /dev/null and b/test/test_workdir_train/board_images_png/st1611.png differ diff --git a/test/testconfig.ini b/test/testconfig.ini new file mode 100644 index 00000000..118f8f8d --- /dev/null +++ b/test/testconfig.ini @@ -0,0 +1,62 @@ +# AZURE STORAGE ACCOUNT INFORMATION +AZURE_STORAGE_ACCOUNT=test +AZURE_STORAGE_KEY=test== +image_container_name=testimages +label_container_name=testabels +# IMAGE INFORMATION +user_folders=True +classes=knot,defect +ideal_class_balance=0.7,0.3,0 +filetype=*.png +# TAGGING MACHINE +tagging_location=D:\\temp\\test +pick_max=True +max_tags_per_pixel=2 +# CUSTOM VISION +#training_key= +#prediction_key= +#project_id= +# TRAINING MACHINE +# Locations +python_file_directory=/home/olgali/repos/active-learning-detect/train +data_dir=/home/olgali/ActiveLearning/Data_cat +train_dir=/home/olgali/ActiveLearning/training_cat +inference_output_dir=cat_inference_graphs +tf_models_location=/home/olgali/repos/tf_model/models/research +download_location=/home/olgali/downloads_cat +# Training +train_iterations=2000 +eval_iterations=10 +min_confidence=.5 +test_percentage=.2 +model_name=faster_rcnn_resnet50_coco_2018_01_28 +optional_pipeline_url=https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/samples/configs/faster_rcnn_resnet50_pets.config +#Init Predictions +init_model_name=faster_rcnn_resnet101_coco_2018_01_28 +# Config File Details +old_label_path=PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt +old_train_path=PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010 +old_val_path=PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-00010 +old_checkpoint_path=PATH_TO_BE_CONFIGURED/model.ckpt +num_examples_marker=num_examples: +num_steps_marker=num_steps: +num_classes_marker=num_classes: +# Calculated +num_classes="$(awk -F ',' '{print NF}' <<< ${classes})" +image_dir=${data_dir}/AllImages +untagged_output=${data_dir}/untagged.csv +tagged_output=${data_dir}/tagged.csv +tagged_predictions=${data_dir}/tagged_preds.csv +test_output=${data_dir}/test.csv +validation_output=${data_dir}/val.csv +tf_location_legacy=${tf_models_location}/object_detection/legacy +tf_location=${tf_models_location}/object_detection +PYTHONPATH=$PYTHONPATH:${tf_models_location}:${tf_models_location}/slim/ +label_map_path=${data_dir}/pascal_label_map.pbtxt +tf_record_location=${data_dir}/stamps.record +tf_train_record=${tf_record_location%.*}_train.${tf_record_location##*.} +tf_val_record=${tf_record_location%.*}_val.${tf_record_location##*.} +tf_url=http://download.tensorflow.org/models/object_detection/${model_name}.tar.gz +pipeline_file=${download_location}/${model_name}/pipeline.config +fine_tune_checkpoint=${download_location}/${model_name}/model.ckpt +init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb diff --git a/test/totag_no_folder_source.csv b/test/totag_no_folder_source.csv new file mode 100644 index 00000000..c57386c7 --- /dev/null +++ b/test/totag_no_folder_source.csv @@ -0,0 +1,564 @@ +filename,class,xmin,xmax,ymin,ymax,height,width,box_confidence,image_confidence +st1425.png,NULL,0,0,0,0,512,488,0,0 +st1507.png,knot,0.30267757,0.36275476,0.30722874,0.3859462,512,488,0.96844035,0.96844035 +st1658.png,NULL,0,0,0,0,512,488,0,0 +st1280.png,NULL,0,0,0,0,512,488,0,0 +st1574.png,knot,0.1320003,0.17829065,0.35484976,0.41822958,512,488,0.9887079,0.926775 +st1574.png,knot,0.8644475,0.9901977,0.19784021,0.3901529,512,488,0.9740087,0.926775 +st1574.png,knot,0.94484687,1,0.710199,0.8731014,512,488,0.926775,0.926775 +st1550.png,knot,0.12705852,0.25362155,0.29243648,0.4436928,512,488,0.97203135,0.97203135 +st1532.png,NULL,0,0,0,0,512,488,0,0 +st1300.png,NULL,0,0,0,0,512,488,0,0 +st1032.png,defect,0.42934287,0.7599493,0.5304596,0.5905363,512,488,0.96906316,0.72713023 +st1032.png,defect,0.2200782,0.27667534,0.18268493,0.53899956,512,488,0.72713023,0.72713023 +st1758.png,NULL,0,0,0,0,512,488,0,0 +st1370.png,NULL,0,0,0,0,512,488,0,0 +st1715.png,knot,0.5078927,0.58547044,0.3985074,0.47241923,512,488,0.8088237,0.8088237 +st1614.png,knot,0.58140886,0.77096295,0.76956195,0.9438482,512,488,0.89606684,0.89606684 +st1346.png,knot,0.32081026,0.38056508,0.31275535,0.38130763,512,488,0.9854725,0.9689765 +st1346.png,knot,0.8616573,0.92554694,0.37297934,0.4253001,512,488,0.9689765,0.9689765 +st1558.png,defect,0.3651751,0.6520672,0.3140911,0.37667334,512,488,0.752485,0.752485 +st1022.png,NULL,0,0,0,0,512,488,0,0 +st1067.png,knot,0.36342445,0.40953362,0.4274848,0.5127849,512,488,0.98211503,0.8958246 +st1067.png,defect,0.61802673,0.99210936,0.5602096,0.6185947,512,488,0.96133125,0.8958246 +st1067.png,knot,0.29145327,0.34137663,0.4940968,0.5431678,512,488,0.8958246,0.8958246 +st1479.png,knot,0.12793025,0.2188721,0.3734488,0.51437014,512,488,0.9717827,0.88874847 +st1479.png,knot,0.7373435,0.7909296,0.40912244,0.44390976,512,488,0.88874847,0.88874847 +st1368.png,NULL,0,0,0,0,512,488,0,0 +st1154.png,NULL,0,0,0,0,512,488,0,0 +st1760.png,NULL,0,0,0,0,512,488,0,0 +st1780.png,NULL,0,0,0,0,512,488,0,0 +st1395.png,NULL,0,0,0,0,512,488,0,0 +st1567.png,knot,0.19071558,0.247627,0.43410447,0.4907445,512,488,0.89100105,0.89100105 +st1626.png,knot,0.87445205,0.9559347,0.9241024,0.99402934,512,488,0.9514488,0.8882066 +st1626.png,knot,0.11829056,0.22432978,0.63148624,0.80027825,512,488,0.8882066,0.8882066 +st1383.png,NULL,0,0,0,0,512,488,0,0 +st1386.png,NULL,0,0,0,0,512,488,0,0 +st1340.png,NULL,0,0,0,0,512,488,0,0 +st1185.png,knot,0.6978268,0.7582275,0.66821593,0.7535644,512,488,0.97257924,0.7035888 +st1185.png,defect,0.35780182,0.60781866,0.27580062,0.32093963,512,488,0.9720861,0.7035888 +st1185.png,knot,0.5183983,0.57071316,0.84764653,0.91617334,512,488,0.9241496,0.7035888 +st1185.png,knot,0.55567926,0.5904746,0.51832056,0.5461106,512,488,0.7035888,0.7035888 +st1174.png,knot,0.22525154,0.42907882,0.69679314,0.9117324,512,488,0.9682847,0.9682847 +st1228.png,knot,0.13831148,0.18676595,0.5025294,0.57187015,512,488,0.9633739,0.9633739 +st1211.png,knot,0.33376285,0.3789052,0.49142396,0.5399416,512,488,0.99031806,0.97394437 +st1211.png,knot,0.61613625,0.6627939,0.5496773,0.6076902,512,488,0.9841182,0.97394437 +st1211.png,knot,0.5185557,0.563689,0.800335,0.838637,512,488,0.9770195,0.97394437 +st1211.png,knot,0.29164347,0.32961696,0.76875347,0.80985427,512,488,0.97394437,0.97394437 +st1409.png,knot,0.50456727,0.55044687,0.43021923,0.5008391,512,488,0.98137134,0.8897141 +st1409.png,knot,0.55252427,0.5947376,0.59299654,0.6804634,512,488,0.90360755,0.8897141 +st1409.png,defect,0.02457425,0.1723926,0.56370574,0.62124217,512,488,0.8897141,0.8897141 +st1811.png,knot,0.06933522,0.12140793,0.56051254,0.6089163,512,488,0.88610995,0.88610995 +st1219.png,knot,0.27650398,0.42174247,0.3864263,0.5609011,512,488,0.969893,0.7928644 +st1219.png,knot,0.88806176,0.9325516,0.5853674,0.6220557,512,488,0.7928644,0.7928644 +st1731.png,NULL,0,0,0,0,512,488,0,0 +st1412.png,knot,0.34901053,0.4057406,0.52082306,0.58607656,512,488,0.994964,0.80384046 +st1412.png,knot,0.88786006,0.93988264,0.86172235,0.9108845,512,488,0.95610106,0.80384046 +st1412.png,knot,0.8434941,0.87894285,0.5202804,0.560757,512,488,0.80384046,0.80384046 +st1418.png,knot,0.12883854,0.19043617,0.6020754,0.65327597,512,488,0.97144216,0.97144216 +st1563.png,knot,0.38526446,0.44112217,0.66772294,0.7896756,512,488,0.87763524,0.87763524 +st1746.png,knot,0.8832216,0.93555427,0.42813456,0.48943433,512,488,0.91031706,0.91031706 +st1526.png,defect,0.3677499,0.9344856,0.52128524,0.5916721,512,488,0.77248275,0.77248275 +st1054.png,NULL,0,0,0,0,512,488,0,0 +st1133.png,NULL,0,0,0,0,512,488,0,0 +st1132.png,NULL,0,0,0,0,512,488,0,0 +st1156.png,knot,0.74468595,0.7872824,0.6505758,0.7120241,512,488,0.9787052,0.8870228 +st1156.png,knot,0.016042642,0.1036161,0.624759,0.74122274,512,488,0.9766431,0.8870228 +st1156.png,knot,0.84342957,0.8916702,0.29592016,0.3381421,512,488,0.8870228,0.8870228 +st1298.png,knot,0.4514519,0.5046197,0.77640456,0.81230736,512,488,0.8977459,0.8977459 +st1474.png,knot,0.005094356,0.0965749,0.73857075,0.857727,512,488,0.94921297,0.94921297 +st1285.png,knot,0.120333195,0.16538988,0.42999956,0.4713941,512,488,0.9724019,0.8775609 +st1285.png,knot,0.7336784,0.8959469,0.42097697,0.59948033,512,488,0.8775609,0.8775609 +st1256.png,NULL,0,0,0,0,512,488,0,0 +st1540.png,NULL,0,0,0,0,512,488,0,0 +st1850.png,NULL,0,0,0,0,512,488,0,0 +st1546.png,NULL,0,0,0,0,512,488,0,0 +st1737.png,NULL,0,0,0,0,512,488,0,0 +st1374.png,knot,0.007972765,0.14858767,0.3385822,0.53610593,512,488,0.9833903,0.9833903 +st1583.png,knot,0.17383476,0.2230064,0.4013973,0.4348171,512,488,0.80748755,0.80748755 +st1652.png,NULL,0,0,0,0,512,488,0,0 +st1233.png,NULL,0,0,0,0,512,488,0,0 +st1264.png,NULL,0,0,0,0,512,488,0,0 +st1106.png,NULL,0,0,0,0,512,488,0,0 +st1253.png,NULL,0,0,0,0,512,488,0,0 +st1752.png,NULL,0,0,0,0,512,488,0,0 +st1817.png,NULL,0,0,0,0,512,488,0,0 +st1158.png,knot,0.5864669,0.709107,0.57217103,0.70062774,512,488,0.8354658,0.8354658 +st1591.png,knot,0.55157113,0.59348816,0.3838704,0.42843914,512,488,0.9774615,0.9774615 +st1019.png,NULL,0,0,0,0,512,488,0,0 +st1756.png,NULL,0,0,0,0,512,488,0,0 +st1814.png,NULL,0,0,0,0,512,488,0,0 +st1411.png,defect,0.4562817,0.74260026,0.41583094,0.47507042,512,488,0.9707597,0.7683885 +st1411.png,defect,0.33153504,0.38562804,0.54801345,0.90057915,512,488,0.7683885,0.7683885 +st1437.png,knot,0.87057835,0.93585074,0.42636645,0.47993812,512,488,0.99364483,0.90721714 +st1437.png,knot,0.38025564,0.4292948,0.8327625,0.875494,512,488,0.9815393,0.90721714 +st1437.png,knot,0.86802167,0.90930617,0.6626619,0.7106249,512,488,0.9466927,0.90721714 +st1437.png,knot,0.30429596,0.345514,0.46304542,0.49877295,512,488,0.9149384,0.90721714 +st1437.png,knot,0.44645458,0.48949313,0.5727815,0.6101881,512,488,0.90721714,0.90721714 +st1312.png,knot,0.9355495,0.9885366,0.55922663,0.6121081,512,488,0.97851986,0.97851986 +st1246.png,NULL,0,0,0,0,512,488,0,0 +st1714.png,NULL,0,0,0,0,512,488,0,0 +st1618.png,knot,0.64436,0.68662965,0.34913963,0.39694545,512,488,0.9157832,0.8893132 +st1618.png,knot,0.21418211,0.248939,0.40383893,0.45200813,512,488,0.8893132,0.8893132 +st1804.png,defect,0.6402425,0.8739144,0.7831069,0.8384883,512,488,0.96215266,0.7692557 +st1804.png,defect,0.4554133,0.6682096,0.88511723,0.9596481,512,488,0.9468119,0.7692557 +st1804.png,knot,0.69052243,0.7642171,0.316173,0.45031247,512,488,0.7692557,0.7692557 +st1419.png,NULL,0,0,0,0,512,488,0,0 +st1489.png,NULL,0,0,0,0,512,488,0,0 +st1160.png,knot,0.13819394,0.19427188,0.35991684,0.4188612,512,488,0.99321955,0.88587844 +st1160.png,knot,0.93188983,0.9892542,0.89026076,0.93819654,512,488,0.88587844,0.88587844 +st1217.png,knot,0.5789819,0.7600379,0.6990706,0.92517173,512,488,0.965711,0.965711 +st1735.png,NULL,0,0,0,0,512,488,0,0 +st1482.png,knot,0.7225375,0.760401,0.39166692,0.4188348,512,488,0.7819961,0.7819961 +st1844.png,knot,0.2735705,0.40624326,0.20760712,0.42487168,512,488,0.98871547,0.93331075 +st1844.png,knot,0.9521196,0.995074,0.34867543,0.4020578,512,488,0.93331075,0.93331075 +st1488.png,NULL,0,0,0,0,512,488,0,0 +st1382.png,NULL,0,0,0,0,512,488,0,0 +st1529.png,knot,0.9444826,0.9944176,0.2133748,0.26350862,512,488,0.96461403,0.96461403 +st1427.png,NULL,0,0,0,0,512,488,0,0 +st1356.png,NULL,0,0,0,0,512,488,0,0 +st1341.png,NULL,0,0,0,0,512,488,0,0 +st1384.png,NULL,0,0,0,0,512,488,0,0 +st1273.png,NULL,0,0,0,0,512,488,0,0 +st1480.png,NULL,0,0,0,0,512,488,0,0 +st1240.png,NULL,0,0,0,0,512,488,0,0 +st1147.png,knot,0.7235606,0.83558035,0.27972806,0.40932456,512,488,0.91702497,0.728294 +st1147.png,knot,0.7145754,0.90363944,0.28619885,0.41697603,512,488,0.728294,0.728294 +st1581.png,knot,0.8395566,0.90542674,0.66599536,0.75553393,512,488,0.9763974,0.9763974 +st1360.png,knot,0.87198615,0.9385322,0.47307807,0.55335397,512,488,0.98329365,0.98329365 +st1398.png,NULL,0,0,0,0,512,488,0,0 +st1827.png,NULL,0,0,0,0,512,488,0,0 +st1028.png,NULL,0,0,0,0,512,488,0,0 +st1504.png,knot,0.7023221,0.7615038,0.34336826,0.42820725,512,488,0.95297855,0.89665526 +st1504.png,knot,0.6145234,0.7415803,0.8845993,1,512,488,0.9271189,0.89665526 +st1504.png,defect,0.002148581,0.27988404,0.5505484,0.59185165,512,488,0.89665526,0.89665526 +st1578.png,knot,0.594302,0.6663906,0.35276932,0.43525606,512,488,0.98448783,0.98448783 +st1036.png,knot,0.028068142,0.0839983,0.806439,0.85608625,512,488,0.9128011,0.9016027 +st1036.png,knot,0.42376837,0.5876269,0.52140605,0.7059735,512,488,0.9016027,0.9016027 +st1499.png,knot,0.5809746,0.6119887,0.4228858,0.47573146,512,488,0.8952005,0.7930336 +st1499.png,knot,0.7553885,0.79633987,0.7965046,0.83087444,512,488,0.7930336,0.7930336 +st1090.png,knot,0.5578696,0.7180711,0.30727062,0.53313506,512,488,0.94313323,0.81639516 +st1090.png,defect,0.73136926,0.8486861,0.6013503,0.6512936,512,488,0.81639516,0.81639516 +st1024.png,defect,0.112691626,0.44226354,0.5310191,0.5835047,512,488,0.87372416,0.7271544 +st1024.png,defect,0.40821457,0.9742286,0.52778643,0.58860064,512,488,0.7271544,0.7271544 +st1601.png,NULL,0,0,0,0,512,488,0,0 +st1730.png,NULL,0,0,0,0,512,488,0,0 +st1328.png,NULL,0,0,0,0,512,488,0,0 +st1404.png,NULL,0,0,0,0,512,488,0,0 +st1234.png,NULL,0,0,0,0,512,488,0,0 +st1130.png,NULL,0,0,0,0,512,488,0,0 +st1432.png,knot,0.2931829,0.3458625,0.56240505,0.60282475,512,488,0.98519933,0.874988 +st1432.png,knot,0.29956195,0.36768192,0.86441845,0.962151,512,488,0.97217214,0.874988 +st1432.png,knot,0.16986336,0.20972233,0.73092186,0.7836456,512,488,0.9664343,0.874988 +st1432.png,knot,0.777344,0.8248272,0.59483975,0.6455976,512,488,0.874988,0.874988 +st1701.png,NULL,0,0,0,0,512,488,0,0 +st1123.png,knot,0.2767068,0.32175153,0.46701193,0.506633,512,488,0.97385967,0.9535659 +st1123.png,knot,0.7109091,0.7986061,0.6256758,0.7137536,512,488,0.9535659,0.9535659 +st1283.png,knot,0.098313406,0.1568827,0.64341545,0.6876629,512,488,0.9695637,0.92305833 +st1283.png,knot,0.19095805,0.23581375,0.21043442,0.2393901,512,488,0.9597842,0.92305833 +st1283.png,knot,0.78956836,0.9321685,0.43591335,0.615597,512,488,0.92305833,0.92305833 +st1566.png,knot,0.20809694,0.25938302,0.8333139,0.881473,512,488,0.9220271,0.9220271 +st1184.png,knot,0.5389529,0.5887932,0.88446933,0.94065404,512,488,0.98569345,0.9756015 +st1184.png,knot,0.7372067,0.7950743,0.43428457,0.5021385,512,488,0.9756015,0.9756015 +st1325.png,knot,0.41096783,0.4686864,0.3719188,0.4173787,512,488,0.97178966,0.71758956 +st1325.png,defect,0.45774087,0.57515985,0.47131798,0.507235,512,488,0.71758956,0.71758956 +st1486.png,NULL,0,0,0,0,512,488,0,0 +st1606.png,NULL,0,0,0,0,512,488,0,0 +st1704.png,NULL,0,0,0,0,512,488,0,0 +st1134.png,NULL,0,0,0,0,512,488,0,0 +st1517.png,knot,0.74200016,0.81116307,0.43382335,0.50637746,512,488,0.8954352,0.8954352 +st1225.png,knot,0.2011443,0.41020003,0.23063861,0.3502846,512,488,0.9019703,0.9019703 +st1342.png,NULL,0,0,0,0,512,488,0,0 +st1294.png,knot,0.5556125,0.6112592,0.3199844,0.37132874,512,488,0.9663085,0.9663085 +st1099.png,knot,0.38057646,0.43379363,0.64525,0.69284534,512,488,0.97903275,0.94032264 +st1099.png,knot,0.783422,0.8516338,0.75065935,0.8024257,512,488,0.94032264,0.94032264 +st1485.png,knot,0.5084776,0.5605646,0.29666436,0.3375187,512,488,0.9734903,0.9734903 +st1778.png,knot,0.33641994,0.40676555,0.5519531,0.6495054,512,488,0.9951767,0.92300224 +st1778.png,knot,0.82943386,0.95425886,0.8764082,0.9973693,512,488,0.92300224,0.92300224 +st1255.png,NULL,0,0,0,0,512,488,0,0 +st1282.png,knot,0.7397333,0.8512135,0.60256326,0.795918,512,488,0.97338796,0.9186734 +st1282.png,knot,0.000657597,0.21586742,0.19925785,0.47003567,512,488,0.9186734,0.9186734 +st1492.png,NULL,0,0,0,0,512,488,0,0 +st1502.png,knot,0.38450775,0.48494667,0.46433184,0.57277703,512,488,0.7445761,0.7445761 +st1301.png,NULL,0,0,0,0,512,488,0,0 +st1221.png,knot,0.82906234,0.87590575,0.29823613,0.34273428,512,488,0.9677968,0.89999956 +st1221.png,knot,0.5514687,0.67128026,0.23506156,0.39411125,512,488,0.94143856,0.89999956 +st1221.png,knot,0.16929382,0.3767521,0.82198226,0.967369,512,488,0.89999956,0.89999956 +st1313.png,NULL,0,0,0,0,512,488,0,0 +st1725.png,NULL,0,0,0,0,512,488,0,0 +st1501.png,defect,0.028466891,0.43053594,0.56010455,0.61603063,512,488,0.788139,0.788139 +st1469.png,NULL,0,0,0,0,512,488,0,0 +st1030.png,NULL,0,0,0,0,512,488,0,0 +st1754.png,NULL,0,0,0,0,512,488,0,0 +st1224.png,NULL,0,0,0,0,512,488,0,0 +st1846.png,NULL,0,0,0,0,512,488,0,0 +st1345.png,knot,0.6995347,0.75696117,0.5753143,0.6314817,512,488,0.97813386,0.97813386 +st1691.png,NULL,0,0,0,0,512,488,0,0 +st1049.png,NULL,0,0,0,0,512,488,0,0 +st1407.png,knot,0.11778457,0.16466995,0.70767486,0.7456024,512,488,0.9189501,0.8215379 +st1407.png,defect,0.18611541,0.62973213,0.24472722,0.523277,512,488,0.8215379,0.8215379 +st1761.png,knot,0.090693764,0.13790727,0.6114098,0.69016904,512,488,0.98304546,0.98304546 +st1434.png,knot,0.23485328,0.28222427,0.55493677,0.59290344,512,488,0.92389,0.74063563 +st1434.png,defect,0.84828746,0.8820617,0.6607343,0.8046339,512,488,0.74063563,0.74063563 +st1338.png,NULL,0,0,0,0,512,488,0,0 +st1748.png,NULL,0,0,0,0,512,488,0,0 +st1372.png,NULL,0,0,0,0,512,488,0,0 +st1738.png,NULL,0,0,0,0,512,488,0,0 +st1505.png,NULL,0,0,0,0,512,488,0,0 +st1236.png,NULL,0,0,0,0,512,488,0,0 +st1630.png,NULL,0,0,0,0,512,488,0,0 +st1794.png,NULL,0,0,0,0,512,488,0,0 +st1114.png,knot,0.9400154,0.9931325,0.5959491,0.6464846,512,488,0.89500606,0.89500606 +st1172.png,knot,0.07152845,0.12519489,0.88483614,0.9465262,512,488,0.9098522,0.9098522 +st1131.png,NULL,0,0,0,0,512,488,0,0 +st1193.png,NULL,0,0,0,0,512,488,0,0 +st1514.png,defect,0.12568697,0.69539475,0.5725874,0.62920445,512,488,0.8348586,0.8212201 +st1514.png,knot,0.9272249,0.9948062,0.6711885,0.8692283,512,488,0.8212201,0.8212201 +st1139.png,knot,0.56677705,0.64111996,0.6869818,0.77499914,512,488,0.9921335,0.9774146 +st1139.png,knot,0.29908285,0.39024782,0.93987906,1,512,488,0.9774146,0.9774146 +st1020.png,NULL,0,0,0,0,512,488,0,0 +st1153.png,NULL,0,0,0,0,512,488,0,0 +st1275.png,NULL,0,0,0,0,512,488,0,0 +st1320.png,NULL,0,0,0,0,512,488,0,0 +st1451.png,knot,0.37755623,0.45656112,0.65385526,0.74539596,512,488,0.8936781,0.8936781 +st1111.png,knot,0.69098425,0.7365354,0.4256106,0.4817522,512,488,0.9783952,0.97570944 +st1111.png,knot,0.11195667,0.16339019,0.43002528,0.47924435,512,488,0.97570944,0.97570944 +st1497.png,knot,0.14220633,0.20099886,0.28387493,0.35495216,512,488,0.9926939,0.94570374 +st1497.png,knot,0.13923028,0.1923464,0.7384727,0.81842715,512,488,0.99130225,0.94570374 +st1497.png,knot,0.6346491,0.67967534,0.5156564,0.5566579,512,488,0.94570374,0.94570374 +st1510.png,knot,0.8977143,0.9465768,0.47015342,0.51805454,512,488,0.94509804,0.94509804 +st1829.png,NULL,0,0,0,0,512,488,0,0 +st1091.png,knot,0.20989896,0.251748,0.34986168,0.3921352,512,488,0.99201256,0.70161 +st1091.png,knot,0.696119,0.7461088,0.27078417,0.33086362,512,488,0.9827361,0.70161 +st1091.png,knot,0.89531857,0.93743694,0.4605299,0.5066802,512,488,0.9794672,0.70161 +st1091.png,defect,0.7629506,1,0.6205898,0.67307687,512,488,0.74762243,0.70161 +st1091.png,knot,0.14214082,0.247842,0.7355515,0.8967391,512,488,0.7072498,0.70161 +st1091.png,defect,0,0.1281265,0.55038965,0.59755194,512,488,0.70161,0.70161 +st1475.png,NULL,0,0,0,0,512,488,0,0 +st1358.png,NULL,0,0,0,0,512,488,0,0 +st1215.png,knot,0.19370292,0.24040401,0.679389,0.7466115,512,488,0.98310626,0.9192501 +st1215.png,knot,0.4819632,0.5332109,0.61278045,0.6552351,512,488,0.98147905,0.9192501 +st1215.png,knot,0.51983714,0.5659016,0.83619606,0.8828397,512,488,0.9192501,0.9192501 +st1670.png,NULL,0,0,0,0,512,488,0,0 +st1428.png,NULL,0,0,0,0,512,488,0,0 +st1549.png,knot,0.63825864,0.7578076,0.60389596,0.76869655,512,488,0.9821302,0.94119686 +st1549.png,knot,0.20611124,0.34173003,0.42097017,0.57046175,512,488,0.94119686,0.94119686 +st1274.png,knot,0.48306307,0.52941614,0.33974242,0.37156188,512,488,0.89189124,0.89189124 +st1732.png,knot,0.7792275,0.8772611,0.27241305,0.33139244,512,488,0.82892925,0.82892925 +st1728.png,NULL,0,0,0,0,512,488,0,0 +st1490.png,NULL,0,0,0,0,512,488,0,0 +st1511.png,knot,0.8874813,0.9360236,0.6798172,0.72863203,512,488,0.95710415,0.95710415 +st1788.png,knot,0.5836056,0.6444321,0.85830957,0.90954554,512,488,0.9565387,0.9565387 +st1795.png,NULL,0,0,0,0,512,488,0,0 +st1733.png,NULL,0,0,0,0,512,488,0,0 +st1393.png,NULL,0,0,0,0,512,488,0,0 +st1843.png,NULL,0,0,0,0,512,488,0,0 +st1682.png,defect,0.039656125,0.42582238,0.5525609,0.6027745,512,488,0.86834276,0.76895356 +st1682.png,defect,0.024935054,0.31917572,0.55124557,0.5962398,512,488,0.76895356,0.76895356 +st1385.png,NULL,0,0,0,0,512,488,0,0 +st1122.png,knot,0.5411203,0.6128086,0.58046335,0.6717515,512,488,0.9858913,0.98368675 +st1122.png,knot,0.056484826,0.11591608,0.5652503,0.6422804,512,488,0.98368675,0.98368675 +st1235.png,knot,0.1276616,0.19752865,0.480699,0.55614275,512,488,0.74746734,0.74746734 +st1524.png,NULL,0,0,0,0,512,488,0,0.05 +st1244.png,NULL,0,0,0,0,512,488,0,0 +st1508.png,NULL,0,0,0,0,512,488,0,0 +st1736.png,NULL,0,0,0,0,512,488,0,0 +st1321.png,NULL,0,0,0,0,512,488,0,0 +st1442.png,NULL,0,0,0,0,512,488,0,0 +st1200.png,knot,0.5654346,0.66832656,0.3173933,0.506531,512,488,0.9404969,0.89732975 +st1200.png,knot,0.5703329,0.64266706,0.56360215,0.63275886,512,488,0.89732975,0.89732975 +st1802.png,NULL,0,0,0,0,512,488,0,0 +st1041.png,NULL,0,0,0,0,512,488,0,0 +st1093.png,defect,0.029552897,0.5419622,0.5649241,0.6236252,512,488,0.7643857,0.7643857 +st1801.png,NULL,0,0,0,0,512,488,0,0 +st1012.png,knot,0.08644599,0.13153824,0.5904512,0.6404672,512,488,0.988781,0.9774973 +st1012.png,knot,0.812379,0.8715301,0.672171,0.7355979,512,488,0.9774973,0.9774973 +st1124.png,knot,0.58822644,0.6349528,0.6409561,0.67426634,512,488,0.8600671,0.8600671 +st1523.png,knot,0.4360306,0.48373103,0.7257771,0.760645,512,488,0.75625575,0.75625575 +st1316.png,NULL,0,0,0,0,512,488,0,0 +st1250.png,NULL,0,0,0,0,512,488,0,0 +st1616.png,NULL,0,0,0,0,512,488,0,0 +st1227.png,knot,0.13855492,0.1769987,0.46492472,0.5240612,512,488,0.8713209,0.8713209 +st1813.png,knot,0.8311317,0.953618,0.9412942,1,512,488,0.95462394,0.9540965 +st1813.png,knot,0.24278454,0.2941218,0.89843416,0.9542148,512,488,0.9540965,0.9540965 +st1375.png,knot,0.6595113,0.7540372,0.24549623,0.33015925,512,488,0.75854486,0.75854486 +st1308.png,NULL,0,0,0,0,512,488,0,0 +st1436.png,knot,0.026985489,0.09129417,0.6343242,0.7144944,512,488,0.9840538,0.91817385 +st1436.png,knot,0.6789712,0.7982183,0.28795344,0.44787335,512,488,0.9764556,0.91817385 +st1436.png,knot,0.7039986,0.74745595,0.76152605,0.79802424,512,488,0.91817385,0.91817385 +st1753.png,NULL,0,0,0,0,512,488,0,0 +st1700.png,knot,0.12157553,0.15873633,0.8837781,0.9422516,512,488,0.9878225,0.9772215 +st1700.png,knot,0.5513875,0.6317189,0.6009193,0.6692112,512,488,0.9772215,0.9772215 +st1545.png,knot,0.29913685,0.3875719,0.66622776,0.75740665,512,488,0.892852,0.892852 +st1677.png,defect,0.15709856,0.4504473,0.5181499,0.5668961,512,488,0.7450831,0.72352904 +st1677.png,defect,0.05941242,0.42110217,0.5009454,0.5704264,512,488,0.72352904,0.72352904 +st1277.png,knot,0.35329628,0.3962914,0.4822781,0.5091933,512,488,0.95444745,0.8027073 +st1277.png,knot,0.52825636,0.56697357,0.23076525,0.256575,512,488,0.8027073,0.8027073 +st1422.png,knot,0.030929413,0.09668733,0.5367563,0.6217439,512,488,0.98173296,0.8505016 +st1422.png,defect,0.2963287,0.38398296,0.49390534,0.53890395,512,488,0.9370916,0.8505016 +st1422.png,knot,0.033933025,0.07376841,0.45551947,0.5043373,512,488,0.8505016,0.8505016 +st1536.png,NULL,0,0,0,0,512,488,0,0 +st1120.png,NULL,0,0,0,0,512,488,0,0 +st1493.png,knot,0.5058768,0.5699435,0.3648815,0.4515034,512,488,0.9947812,0.7626483 +st1493.png,knot,0.5272039,0.57829803,0.90566856,0.9900118,512,488,0.9907516,0.7626483 +st1493.png,defect,0.72455865,0.943826,0.27428144,0.31762776,512,488,0.7626483,0.7626483 +st1657.png,NULL,0,0,0,0,512,488,0,0 +st1369.png,NULL,0,0,0,0,512,488,0,0 +st1096.png,knot,0.14781785,0.21973783,0.87173665,0.9797294,512,488,0.9355302,0.82652843 +st1096.png,knot,0.016842283,0.088842124,0.21323693,0.2722416,512,488,0.82652843,0.82652843 +st1462.png,knot,0.37319168,0.4497309,0.43395936,0.53268087,512,488,0.88651085,0.88651085 +st1776.png,NULL,0,0,0,0,512,488,0,0 +st1705.png,knot,0.3318905,0.3849011,0.6956777,0.76191366,512,488,0.96945816,0.96945816 +st1582.png,knot,0.896007,0.9702874,0.4574962,0.55282426,512,488,0.9555228,0.9555228 +st1584.png,NULL,0,0,0,0,512,488,0,0 +st1119.png,NULL,0,0,0,0,512,488,0,0 +st1121.png,NULL,0,0,0,0,512,488,0,0 +st1470.png,NULL,0,0,0,0,512,488,0,0 +st1056.png,NULL,0,0,0,0,512,488,0,0 +st1281.png,knot,0.47483134,0.52562714,0.44982663,0.48879707,512,488,0.9294945,0.9294945 +st1033.png,knot,0.20481113,0.24327247,0.7391399,0.82345104,512,488,0.83181804,0.83181804 +st1394.png,NULL,0,0,0,0,512,488,0,0 +st1039.png,NULL,0,0,0,0,512,488,0,0 +st1611.png,knot,0.6326234,0.7054164,0.86741334,0.96444726,512,488,0.99616516,0.9843567 +st1611.png,knot,0.07399843,0.11282173,0.32572043,0.36819047,512,488,0.9843567,0.9843567 +st1815.png,NULL,0,0,0,0,512,488,0,0 +st1343.png,NULL,0,0,0,0,512,488,0,0 +st1639.png,NULL,0,0,0,0,512,488,0,0 +st1194.png,knot,0.6489157,0.7092218,0.73913777,0.8004431,512,488,0.98375535,0.98375535 +st1773.png,NULL,0,0,0,0,512,488,0,0 +st1381.png,NULL,0,0,0,0,512,488,0,0 +st1180.png,NULL,0,0,0,0,512,488,0,0 +st1564.png,defect,0.27094924,0.71608114,0.56096876,0.624119,512,488,0.86454993,0.7657204 +st1564.png,defect,0.30570227,0.38637656,0.23720618,0.5720114,512,488,0.7657204,0.7657204 +st1076.png,knot,0.1993315,0.2713721,0.61612856,0.67154235,512,488,0.98148555,0.98148555 +st1449.png,NULL,0,0,0,0,512,488,0,0 +st1262.png,NULL,0,0,0,0,512,488,0,0 +st1547.png,NULL,0,0,0,0,512,488,0,0 +st1388.png,knot,0.77038956,0.8246681,0.2486437,0.3917786,512,488,0.8589068,0.7435218 +st1388.png,defect,0.028243389,0.3578855,0.5529958,0.61774576,512,488,0.7435218,0.7435218 +st1071.png,NULL,0,0,0,0,512,488,0,0 +st1319.png,NULL,0,0,0,0,512,488,0,0 +st1323.png,knot,0.27874127,0.33264512,0.78055125,0.82403004,512,488,0.9629542,0.9629542 +st1267.png,defect,0.005203705,0.18179724,0.5817473,0.62368834,512,488,0.95650816,0.8531658 +st1267.png,knot,0.76825047,0.8674093,0.46741802,0.57576823,512,488,0.8531658,0.8531658 +st1571.png,knot,0.18502116,0.23628128,0.82359684,0.8792989,512,488,0.9745147,0.9745147 +st1625.png,knot,0.8698924,0.9376181,0.32788292,0.40407893,512,488,0.9801192,0.8253587 +st1625.png,knot,0.12062892,0.21728584,0.50254875,0.6091943,512,488,0.9581743,0.8253587 +st1625.png,knot,0.8375989,0.9150144,0.8612594,0.95407206,512,488,0.9176986,0.8253587 +st1625.png,defect,0.94997066,1,0.30075625,0.34504828,512,488,0.8253587,0.8253587 +st1092.png,defect,0,0.5400781,0.5343204,0.60199744,512,488,0.8241516,0.78555936 +st1092.png,defect,0.2487825,0.9571989,0.5449516,0.6100018,512,488,0.78555936,0.78555936 +st1229.png,knot,0.005342853,0.04803483,0.59346676,0.6700139,512,488,0.7080499,0.7080499 +st1023.png,NULL,0,0,0,0,512,488,0,0 +st1840.png,knot,0.12036637,0.18497443,0.7618415,0.8283344,512,488,0.986,0.986 +st1840.png,knot,0.7297609,0.7755673,0.62443626,0.6670296,512,488,0.986,0.986 +st1840.png,defect,0.76513,0.9952971,0.6075407,0.6546806,512,488,0.986,0.986 +st1808.png,NULL,0,0,0,0,512,488,0,0 +st1491.png,NULL,0,0,0,0,512,488,0,0 +st1021.png,NULL,0,0,0,0,512,488,0,0 +st1421.png,knot,0.11980621,0.18726023,0.7555569,0.8244165,512,488,0.958447,0.958447 +st1339.png,NULL,0,0,0,0,512,488,0,0 +st1554.png,NULL,0,0,0,0,512,488,0,0 +st1481.png,knot,0.53876394,0.590091,0.36425796,0.41175127,512,488,0.96814036,0.96814036 +st1377.png,knot,0.05203633,0.09342611,0.8601645,0.9058688,512,488,0.9739779,0.9739779 +st1128.png,knot,0.80625194,0.91632825,0.55765307,0.7005993,512,488,0.9684247,0.8107647 +st1128.png,knot,0.32348785,0.36260453,0.6365056,0.67154855,512,488,0.9590423,0.8107647 +st1128.png,knot,0.072546415,0.11205525,0.3796195,0.42445257,512,488,0.95505,0.8107647 +st1128.png,defect,0.020768221,0.4429854,0.49122843,0.56238526,512,488,0.9002104,0.8107647 +st1128.png,defect,0.17511989,0.46910113,0.49637127,0.5440628,512,488,0.8107647,0.8107647 +st1127.png,knot,0.20359582,0.25129482,0.5636938,0.6114353,512,488,0.9720497,0.71256244 +st1127.png,knot,0.17509237,0.2645835,0.7619097,0.89300954,512,488,0.96749485,0.71256244 +st1127.png,knot,0.1909762,0.23535307,0.45002174,0.50459236,512,488,0.93286186,0.71256244 +st1127.png,knot,0.6537879,0.78765035,0.39515936,0.57093585,512,488,0.8413151,0.71256244 +st1127.png,knot,0.123352595,0.27046844,0.71307296,0.88086075,512,488,0.71256244,0.71256244 +st1017.png,knot,0.15934287,0.21258824,0.69495475,0.74923414,512,488,0.9906481,0.9513629 +st1017.png,knot,0.10266534,0.15678032,0.30125916,0.3437191,512,488,0.96538395,0.9513629 +st1017.png,knot,0.89806724,0.94799244,0.6149101,0.6703377,512,488,0.9513629,0.9513629 +st1649.png,NULL,0,0,0,0,512,488,0,0 +st1824.png,knot,0.16498344,0.23226959,0.7412715,0.8212256,512,488,0.9905476,0.9675445 +st1824.png,knot,0.9254969,0.9730143,0.46670482,0.51139265,512,488,0.9675445,0.9675445 +st1789.png,NULL,0,0,0,0,512,488,0,0 +st1063.png,NULL,0,0,0,0,512,488,0,0 +st1445.png,knot,0.6443819,0.6869104,0.7897866,0.8300049,512,488,0.9764658,0.9764658 +st1519.png,NULL,0,0,0,0,512,488,0,0 +st1539.png,NULL,0,0,0,0,512,488,0,0 +st1441.png,NULL,0,0,0,0,512,488,0,0 +st1155.png,knot,0.056128483,0.12090374,0.67237306,0.71741337,512,488,0.98089087,0.98089087 +st1179.png,knot,0.45837882,0.5055271,0.91032165,0.9597935,512,488,0.97704417,0.97704417 +st1015.png,knot,0.4657949,0.5192351,0.77765274,0.83437693,512,488,0.99483126,0.9771674 +st1015.png,knot,0.62487704,0.6703066,0.582573,0.6338162,512,488,0.9771674,0.9771674 +st1315.png,NULL,0,0,0,0,512,488,0,0 +st1065.png,knot,0.17431675,0.22347072,0.23654391,0.2848744,512,488,0.9868074,0.7672756 +st1065.png,knot,0.761391,0.81125563,0.7722216,0.81279993,512,488,0.9718078,0.7672756 +st1065.png,knot,0.5324617,0.5758695,0.522541,0.5539788,512,488,0.7672756,0.7672756 +st1357.png,NULL,0,0,0,0,512,488,0,0 +st1667.png,knot,0.90539,1,0.75757515,0.95417035,512,488,0.9598757,0.9598757 +st1671.png,NULL,0,0,0,0,512,488,0,0 +st1516.png,knot,0.52866757,0.64624226,0.5859474,0.7330528,512,488,0.87794745,0.87794745 +st1101.png,NULL,0,0,0,0,512,488,0,0 +st1260.png,knot,0.008780143,0.06881653,0.55893314,0.6122557,512,488,0.96111435,0.96111435 +st1351.png,knot,0.37872162,0.43863806,0.55814743,0.6412034,512,488,0.9947301,0.97738373 +st1351.png,knot,0.7162861,0.782209,0.2965802,0.374671,512,488,0.9933106,0.97738373 +st1351.png,knot,0.82775426,0.8713944,0.5412964,0.5987785,512,488,0.97738373,0.97738373 +st1376.png,NULL,0,0,0,0,512,488,0,0 +st1186.png,knot,0.6058496,0.65804994,0.75309813,0.8104734,512,488,0.98136395,0.98136395 +st1538.png,NULL,0,0,0,0,512,488,0,0 +st1287.png,NULL,0,0,0,0,512,488,0,0 +st1053.png,NULL,0,0,0,0,512,488,0,0 +st1681.png,NULL,0,0,0,0,512,488,0,0 +st1190.png,defect,0.035275936,0.60065794,0.54249704,0.60351425,512,488,0.8525961,0.76389796 +st1190.png,defect,0.48787197,0.97275436,0.52801114,0.5940613,512,488,0.7868591,0.76389796 +st1190.png,defect,0.018175239,0.38031426,0.5527138,0.6027485,512,488,0.76389796,0.76389796 +st1628.png,NULL,0,0,0,0,512,488,0,0 +st1522.png,NULL,0,0,0,0,512,488,0,0 +st1270.png,knot,0.34678867,0.4117869,0.2727826,0.34167832,512,488,0.9446557,0.9446557 +st1659.png,NULL,0,0,0,0,512,488,0,0 +st1534.png,NULL,0,0,0,0,512,488,0,0 +st1029.png,NULL,0,0,0,0,512,488,0,0 +st1594.png,knot,0.33551028,0.38614926,0.16877243,0.21096465,512,488,0.987112,0.8777176 +st1594.png,defect,0.6975558,0.9963596,0.5279547,0.57945627,512,488,0.94186485,0.8777176 +st1594.png,defect,0.064073026,0.73349494,0.5247153,0.58300364,512,488,0.8777176,0.8777176 +st1257.png,NULL,0,0,0,0,512,488,0,0 +st1585.png,NULL,0,0,0,0,512,488,0,0 +st1805.png,NULL,0,0,0,0,512,488,0,0 +st1263.png,NULL,0,0,0,0,512,488,0,0 +st1265.png,NULL,0,0,0,0,512,488,0,0 +st1763.png,NULL,0,0,0,0,512,488,0,0 +st1458.png,NULL,0,0,0,0,512,488,0,0 +st1530.png,knot,0.53668815,0.5918307,0.8938515,0.94775677,512,488,0.9752803,0.9752803 +st1605.png,NULL,0,0,0,0,512,488,0,0 +st1604.png,NULL,0,0,0,0,512,488,0,0 +st1483.png,defect,0.774059,0.98008376,0.43718264,0.4840865,512,488,0.7574974,0.7574974 +st1302.png,NULL,0,0,0,0,512,488,0,0 +st1579.png,NULL,0,0,0,0,512,488,0,0 +st1694.png,NULL,0,0,0,0,512,488,0,0 +st1570.png,NULL,0,0,0,0,512,488,0,0 +st1711.png,knot,0.5873118,0.6285114,0.37589058,0.43903938,512,488,0.9746859,0.9138147 +st1711.png,knot,0.57629526,0.61687636,0.7231382,0.76612574,512,488,0.9697566,0.9138147 +st1711.png,defect,0.32764342,0.5752018,0.54542994,0.5934744,512,488,0.9138147,0.9138147 +st1140.png,knot,0.8942638,0.9481075,0.5630682,0.6189817,512,488,0.97847044,0.97847044 +st1781.png,NULL,0,0,0,0,512,488,0,0 +st1348.png,knot,0,0.031571154,0.6337116,0.6920282,512,488,0.9581143,0.89654917 +st1348.png,knot,0.6687841,0.7174068,0.43589646,0.47504494,512,488,0.89654917,0.89654917 +st1615.png,NULL,0,0,0,0,512,488,0,0 +st1170.png,knot,0.4402049,0.49130502,0.8758248,0.931154,512,488,0.984606,0.9822368 +st1170.png,knot,0.81391335,0.86345357,0.76427454,0.82079476,512,488,0.9822368,0.9822368 +st1690.png,knot,0.19851859,0.24199493,0.3909187,0.42710394,512,488,0.9121909,0.9121909 +st1792.png,knot,0.23595089,0.29004455,0.2956416,0.3383845,512,488,0.9906427,0.910417 +st1792.png,knot,0.8010435,0.8440271,0.8672871,0.9091894,512,488,0.910417,0.910417 +st1803.png,NULL,0,0,0,0,512,488,0,0 +st1569.png,NULL,0,0,0,0,512,488,0,0 +st1551.png,knot,0.63619053,0.7649639,0.40705118,0.5653195,512,488,0.99102426,0.98297393 +st1551.png,knot,0.17823221,0.31843635,0.6092808,0.78658724,512,488,0.98297393,0.98297393 +st1660.png,NULL,0,0,0,0,512,488,0,0 +st1307.png,knot,0.19044122,0.22998337,0.2392788,0.29569614,512,488,0.9176673,0.75701463 +st1307.png,knot,0.4755669,0.5202824,0.24868643,0.29236877,512,488,0.8936444,0.75701463 +st1307.png,knot,0.756844,0.7962363,0.23921326,0.28953588,512,488,0.75701463,0.75701463 +st1330.png,NULL,0,0,0,0,512,488,0,0 +st1749.png,knot,0.29975393,0.38158545,0.47838268,0.55155176,512,488,0.98031086,0.98031086 +st1521.png,knot,0.46013328,0.5463568,0.4740795,0.5873207,512,488,0.9323549,0.9323549 +st1672.png,NULL,0,0,0,0,512,488,0,0 +st1809.png,NULL,0,0,0,0,512,488,0,0 +st1104.png,NULL,0,0,0,0,512,488,0,0 +st1586.png,NULL,0,0,0,0,512,488,0,0 +st1533.png,NULL,0,0,0,0,512,488,0,0 +st1727.png,NULL,0,0,0,0,512,488,0,0 +st1762.png,NULL,0,0,0,0,512,488,0,0 +st1561.png,NULL,0,0,0,0,512,488,0,0 +st1220.png,knot,0.12324664,0.16531894,0.88222283,0.9185721,512,488,0.7501036,0.7501036 +st1849.png,knot,0.8353676,0.9049195,0.9066991,0.9460937,512,488,0.88539225,0.88539225 +st1064.png,NULL,0,0,0,0,512,488,0,0 +st1304.png,NULL,0,0,0,0,512,488,0,0 +st1136.png,NULL,0,0,0,0,512,488,0,0 +st1842.png,NULL,0,0,0,0,512,488,0,0 +st1216.png,NULL,0,0,0,0,512,488,0,0 +st1478.png,knot,0.3499687,0.4842524,0.57836384,0.7447651,512,488,0.9697584,0.9697584 +st1772.png,NULL,0,0,0,0,512,488,0,0 +st1142.png,knot,0.10130471,0.1666631,0.93701386,1,512,488,0.99122393,0.96465534 +st1142.png,knot,0.42964286,0.48378333,0.73776567,0.79399115,512,488,0.9888548,0.96465534 +st1142.png,knot,0,0.025988707,0.42292097,0.4892505,512,488,0.96465534,0.96465534 +st1070.png,NULL,0,0,0,0,512,488,0,0 +st1683.png,defect,0.5329239,0.97495586,0.5084751,0.5724312,512,488,0.9590508,0.7539403 +st1683.png,defect,0.030474966,0.38542023,0.5292042,0.5737903,512,488,0.9506095,0.7539403 +st1683.png,knot,0.51778054,0.56286615,0.5672909,0.64855087,512,488,0.9187184,0.7539403 +st1683.png,knot,0.78114593,0.82849365,0.9175072,0.9523704,512,488,0.877884,0.7539403 +st1683.png,defect,0.56203073,0.6564255,0.18660906,0.4561023,512,488,0.7758399,0.7539403 +st1683.png,defect,0.5718204,0.9096621,0.51635915,0.56507355,512,488,0.7539403,0.7539403 +st1423.png,NULL,0,0,0,0,512,488,0,0 +st1787.png,NULL,0,0,0,0,512,488,0,0 +st1692.png,NULL,0,0,0,0,512,488,0,0 +st1848.png,NULL,0,0,0,0,512,488,0,0 +st1055.png,NULL,0,0,0,0,512,488,0,0 +st1026.png,knot,0.2674017,0.35383838,0.39859554,0.50976944,512,488,0.9884343,0.96366304 +st1026.png,knot,0.69417506,0.744075,0.34379873,0.39051458,512,488,0.97863936,0.96366304 +st1026.png,defect,0.70078284,0.9907891,0.5857268,0.6470487,512,488,0.96366304,0.96366304 +st1826.png,NULL,0,0,0,0,512,488,0,0 +st1047.png,knot,0.3542995,0.45425007,0.5452347,0.67343616,512,488,0.98686105,0.85297066 +st1047.png,defect,0.12903255,0.31020302,0.31878722,0.3649246,512,488,0.85297066,0.85297066 +st1751.png,NULL,0,0,0,0,512,488,0,0 +st1786.png,NULL,0,0,0,0,512,488,0,0 +st1069.png,NULL,0,0,0,0,512,488,0,0 +st1037.png,knot,0.6725518,0.7388108,0.6160668,0.68687695,512,488,0.9883166,0.71845925 +st1037.png,knot,0.15919425,0.23021916,0.60911787,0.681755,512,488,0.9800915,0.71845925 +st1037.png,defect,0.005260315,0.34612125,0.51574904,0.5804733,512,488,0.79289407,0.71845925 +st1037.png,defect,0.018672304,0.322812,0.5305951,0.57148284,512,488,0.71845925,0.71845925 +st1821.png,knot,0.11944925,0.19018118,0.9087665,0.9943758,512,488,0.99410444,0.9670556 +st1821.png,knot,0.59818923,0.65603995,0.6801038,0.7442376,512,488,0.9670556,0.9670556 +st1364.png,NULL,0,0,0,0,512,488,0,0 +st1192.png,knot,0.39846605,0.45543727,0.36765742,0.4488806,512,488,0.99612194,0.7127546 +st1192.png,defect,0.07790943,0.44866413,0.5975798,0.640683,512,488,0.80447847,0.7127546 +st1192.png,defect,0.47953823,0.7499259,0.5517361,0.59940904,512,488,0.7127546,0.7127546 +st1602.png,knot,0.9574499,1,0.42608044,0.4791277,512,488,0.72733927,0.72733927 +st1379.png,knot,0.29298508,0.43081832,0.5901556,0.77116805,512,488,0.9793032,0.9793032 +st1440.png,NULL,0,0,0,0,512,488,0,0 +st1719.png,NULL,0,0,0,0,512,488,0,0 +st1716.png,NULL,0,0,0,0,512,488,0,0 +st1108.png,knot,0.69631505,0.73806465,0.6270994,0.6722544,512,488,0.9886671,0.90558296 +st1108.png,knot,0.6181019,0.69406986,0.23797426,0.29145443,512,488,0.9754843,0.90558296 +st1108.png,knot,0.12284262,0.15919161,0.6235059,0.686582,512,488,0.9687026,0.90558296 +st1108.png,knot,0.29641727,0.3453667,0.5215502,0.5541794,512,488,0.90558296,0.90558296 +st1798.png,NULL,0,0,0,0,512,488,0,0 +st1596.png,knot,0.7262323,0.7746997,0.23982525,0.2841002,512,488,0.9764713,0.8792314 +st1596.png,defect,0,0.18872425,0.6252165,0.6738723,512,488,0.8792314,0.8792314 +st1107.png,NULL,0,0,0,0,512,488,0,0 +st1593.png,NULL,0,0,0,0,512,488,0,0 +st1740.png,knot,0.2112985,0.2654443,0.6285077,0.6783256,512,488,0.97739637,0.97739637 +st1463.png,knot,0.46628618,0.5137217,0.5166115,0.5817609,512,488,0.90653586,0.90653586 +st1249.png,knot,0.67133003,0.7416619,0.56961924,0.63172895,512,488,0.7778287,0.7778287 +st1292.png,knot,0.20523047,0.32447684,0.19784732,0.3516566,512,488,0.98957986,0.9779925 +st1292.png,knot,0.76388824,0.861159,0.8165224,0.9411841,512,488,0.9779925,0.9779925 +st1232.png,NULL,0,0,0,0,512,488,0,0 +st1734.png,NULL,0,0,0,0,512,488,0,0 +st1332.png,NULL,0,0,0,0,512,488,0,0 +st1799.png,NULL,0,0,0,0,512,488,0,0 +st1696.png,knot,0.5082208,0.55826336,0.40370333,0.4456584,512,488,0.9750294,0.9750294 +st1353.png,knot,0.20250683,0.26231235,0.70863116,0.778109,512,488,0.99471915,0.87796557 +st1353.png,knot,0.9440131,0.9763329,0.7736546,0.83807546,512,488,0.87796557,0.87796557 +st1042.png,NULL,0,0,0,0,512,488,0,0 +st1088.png,knot,0.63853097,0.70713216,0.5170407,0.5931604,512,488,0.99378115,0.97339106 +st1088.png,knot,0.016109794,0.12432815,0.6511545,0.8050783,512,488,0.97339106,0.97339106 +st1764.png,NULL,0,0,0,0,512,488,0,0 +st1706.png,NULL,0,0,0,0,512,488,0,0 +st1555.png,NULL,0,0,0,0,512,488,0,0 +st1157.png,knot,0.55773807,0.6353789,0.796305,0.90967226,512,488,0.98938215,0.97597164 +st1157.png,knot,0.6030029,0.70828706,0.46024242,0.56399477,512,488,0.97597164,0.97597164 +st1310.png,NULL,0,0,0,0,512,488,0,0 +st1331.png,NULL,0,0,0,0,512,488,0,0 +st1137.png,knot,0.5549138,0.655619,0.6309172,0.75291824,512,488,0.98047423,0.9802043 +st1137.png,knot,0.6839061,0.796325,0.28546476,0.4368632,512,488,0.9802043,0.9802043 +st1311.png,knot,0.17855604,0.22768492,0.59137386,0.6507016,512,488,0.96903574,0.96903574 +st1466.png,NULL,0,0,0,0,512,488,0,0 +st1051.png,knot,0.7411262,0.8467681,0.33830678,0.44452292,512,488,0.9951338,0.74332273 +st1051.png,knot,0.5914114,0.62914854,0.30536783,0.32768086,512,488,0.74332273,0.74332273 +st1830.png,knot,0.2591614,0.3020172,0.30837727,0.3550865,512,488,0.9754875,0.96484137 +st1830.png,knot,0.88722545,0.9334164,0.37343535,0.4335522,512,488,0.96484137,0.96484137 +st1040.png,NULL,0,0,0,0,512,488,0,0 +st1556.png,knot,0.5110332,0.56798416,0.5350004,0.59305537,512,488,0.9750122,0.7373006 +st1556.png,defect,0.12589139,0.23129989,0.5029541,0.5395938,512,488,0.7373006,0.7373006 +st1702.png,NULL,0,0,0,0,512,488,0,0 +st1038.png,knot,0.31787464,0.36652422,0.33077985,0.3723501,512,488,0.97705007,0.97705007 +st1335.png,NULL,0,0,0,0,512,488,0,0 +st1202.png,knot,0.7601946,0.8447372,0.46703416,0.56048256,512,488,0.94609445,0.94609445 +st1518.png,knot,0.56253266,0.6819586,0.4816731,0.61785614,512,488,0.97566086,0.87584656 +st1518.png,knot,0.106342345,0.17423195,0.41895986,0.48053488,512,488,0.8768969,0.87584656 +st1518.png,knot,0.7848372,0.84227246,0.61229557,0.64354527,512,488,0.87584656,0.87584656 diff --git a/test/totag_source.csv b/test/totag_source.csv new file mode 100644 index 00000000..fc6e5d9e --- /dev/null +++ b/test/totag_source.csv @@ -0,0 +1,564 @@ +filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence +st1425.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1507.png,knot,0.30267757,0.36275476,0.30722874,0.3859462,512,488,board_images_png,0.96844035,0.96844035 +st1658.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1280.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1574.png,knot,0.1320003,0.17829065,0.35484976,0.41822958,512,488,board_images_png,0.9887079,0.926775 +st1574.png,knot,0.8644475,0.9901977,0.19784021,0.3901529,512,488,board_images_png,0.9740087,0.926775 +st1574.png,knot,0.94484687,1.0,0.710199,0.8731014,512,488,board_images_png,0.926775,0.926775 +st1550.png,knot,0.12705852,0.25362155,0.29243648,0.4436928,512,488,board_images_png,0.97203135,0.97203135 +st1532.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1300.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1032.png,defect,0.42934287,0.7599493,0.5304596,0.5905363,512,488,board_images_png,0.96906316,0.72713023 +st1032.png,defect,0.2200782,0.27667534,0.18268493,0.53899956,512,488,board_images_png,0.72713023,0.72713023 +st1758.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1370.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1715.png,knot,0.5078927,0.58547044,0.3985074,0.47241923,512,488,board_images_png,0.8088237,0.8088237 +st1614.png,knot,0.58140886,0.77096295,0.76956195,0.9438482,512,488,board_images_png,0.89606684,0.89606684 +st1346.png,knot,0.32081026,0.38056508,0.31275535,0.38130763,512,488,board_images_png,0.9854725,0.9689765 +st1346.png,knot,0.8616573,0.92554694,0.37297934,0.4253001,512,488,board_images_png,0.9689765,0.9689765 +st1558.png,defect,0.3651751,0.6520672,0.3140911,0.37667334,512,488,board_images_png,0.752485,0.752485 +st1022.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1067.png,knot,0.36342445,0.40953362,0.4274848,0.5127849,512,488,board_images_png,0.98211503,0.8958246 +st1067.png,defect,0.61802673,0.99210936,0.5602096,0.6185947,512,488,board_images_png,0.96133125,0.8958246 +st1067.png,knot,0.29145327,0.34137663,0.4940968,0.5431678,512,488,board_images_png,0.8958246,0.8958246 +st1479.png,knot,0.12793025,0.2188721,0.3734488,0.51437014,512,488,board_images_png,0.9717827,0.88874847 +st1479.png,knot,0.7373435,0.7909296,0.40912244,0.44390976,512,488,board_images_png,0.88874847,0.88874847 +st1368.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1154.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1760.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1780.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1395.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1567.png,knot,0.19071558,0.247627,0.43410447,0.4907445,512,488,board_images_png,0.89100105,0.89100105 +st1626.png,knot,0.87445205,0.9559347,0.9241024,0.99402934,512,488,board_images_png,0.9514488,0.8882066 +st1626.png,knot,0.11829056,0.22432978,0.63148624,0.80027825,512,488,board_images_png,0.8882066,0.8882066 +st1383.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1386.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1340.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1185.png,knot,0.6978268,0.7582275,0.66821593,0.7535644,512,488,board_images_png,0.97257924,0.7035888 +st1185.png,defect,0.35780182,0.60781866,0.27580062,0.32093963,512,488,board_images_png,0.9720861,0.7035888 +st1185.png,knot,0.5183983,0.57071316,0.84764653,0.91617334,512,488,board_images_png,0.9241496,0.7035888 +st1185.png,knot,0.55567926,0.5904746,0.51832056,0.5461106,512,488,board_images_png,0.7035888,0.7035888 +st1174.png,knot,0.22525154,0.42907882,0.69679314,0.9117324,512,488,board_images_png,0.9682847,0.9682847 +st1228.png,knot,0.13831148,0.18676595,0.5025294,0.57187015,512,488,board_images_png,0.9633739,0.9633739 +st1211.png,knot,0.33376285,0.3789052,0.49142396,0.5399416,512,488,board_images_png,0.99031806,0.97394437 +st1211.png,knot,0.61613625,0.6627939,0.5496773,0.6076902,512,488,board_images_png,0.9841182,0.97394437 +st1211.png,knot,0.5185557,0.563689,0.800335,0.838637,512,488,board_images_png,0.9770195,0.97394437 +st1211.png,knot,0.29164347,0.32961696,0.76875347,0.80985427,512,488,board_images_png,0.97394437,0.97394437 +st1409.png,knot,0.50456727,0.55044687,0.43021923,0.5008391,512,488,board_images_png,0.98137134,0.8897141 +st1409.png,knot,0.55252427,0.5947376,0.59299654,0.6804634,512,488,board_images_png,0.90360755,0.8897141 +st1409.png,defect,0.02457425,0.1723926,0.56370574,0.62124217,512,488,board_images_png,0.8897141,0.8897141 +st1811.png,knot,0.06933522,0.12140793,0.56051254,0.6089163,512,488,board_images_png,0.88610995,0.88610995 +st1219.png,knot,0.27650398,0.42174247,0.3864263,0.5609011,512,488,board_images_png,0.969893,0.7928644 +st1219.png,knot,0.88806176,0.9325516,0.5853674,0.6220557,512,488,board_images_png,0.7928644,0.7928644 +st1731.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1412.png,knot,0.34901053,0.4057406,0.52082306,0.58607656,512,488,board_images_png,0.994964,0.80384046 +st1412.png,knot,0.88786006,0.93988264,0.86172235,0.9108845,512,488,board_images_png,0.95610106,0.80384046 +st1412.png,knot,0.8434941,0.87894285,0.5202804,0.560757,512,488,board_images_png,0.80384046,0.80384046 +st1418.png,knot,0.12883854,0.19043617,0.6020754,0.65327597,512,488,board_images_png,0.97144216,0.97144216 +st1563.png,knot,0.38526446,0.44112217,0.66772294,0.7896756,512,488,board_images_png,0.87763524,0.87763524 +st1746.png,knot,0.8832216,0.93555427,0.42813456,0.48943433,512,488,board_images_png,0.91031706,0.91031706 +st1526.png,defect,0.3677499,0.9344856,0.52128524,0.5916721,512,488,board_images_png,0.77248275,0.77248275 +st1054.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1133.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1132.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1156.png,knot,0.74468595,0.7872824,0.6505758,0.7120241,512,488,board_images_png,0.9787052,0.8870228 +st1156.png,knot,0.016042642,0.1036161,0.624759,0.74122274,512,488,board_images_png,0.9766431,0.8870228 +st1156.png,knot,0.84342957,0.8916702,0.29592016,0.3381421,512,488,board_images_png,0.8870228,0.8870228 +st1298.png,knot,0.4514519,0.5046197,0.77640456,0.81230736,512,488,board_images_png,0.8977459,0.8977459 +st1474.png,knot,0.0050943564,0.0965749,0.73857075,0.857727,512,488,board_images_png,0.94921297,0.94921297 +st1285.png,knot,0.120333195,0.16538988,0.42999956,0.4713941,512,488,board_images_png,0.9724019,0.8775609 +st1285.png,knot,0.7336784,0.8959469,0.42097697,0.59948033,512,488,board_images_png,0.8775609,0.8775609 +st1256.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1540.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1850.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1546.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1737.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1374.png,knot,0.007972765,0.14858767,0.3385822,0.53610593,512,488,board_images_png,0.9833903,0.9833903 +st1583.png,knot,0.17383476,0.2230064,0.4013973,0.4348171,512,488,board_images_png,0.80748755,0.80748755 +st1652.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1233.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1264.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1106.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1253.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1752.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1817.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1158.png,knot,0.5864669,0.709107,0.57217103,0.70062774,512,488,board_images_png,0.8354658,0.8354658 +st1591.png,knot,0.55157113,0.59348816,0.3838704,0.42843914,512,488,board_images_png,0.9774615,0.9774615 +st1019.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1756.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1814.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1411.png,defect,0.4562817,0.74260026,0.41583094,0.47507042,512,488,board_images_png,0.9707597,0.7683885 +st1411.png,defect,0.33153504,0.38562804,0.54801345,0.90057915,512,488,board_images_png,0.7683885,0.7683885 +st1437.png,knot,0.87057835,0.93585074,0.42636645,0.47993812,512,488,board_images_png,0.99364483,0.90721714 +st1437.png,knot,0.38025564,0.4292948,0.8327625,0.875494,512,488,board_images_png,0.9815393,0.90721714 +st1437.png,knot,0.86802167,0.90930617,0.6626619,0.7106249,512,488,board_images_png,0.9466927,0.90721714 +st1437.png,knot,0.30429596,0.345514,0.46304542,0.49877295,512,488,board_images_png,0.9149384,0.90721714 +st1437.png,knot,0.44645458,0.48949313,0.5727815,0.6101881,512,488,board_images_png,0.90721714,0.90721714 +st1312.png,knot,0.9355495,0.9885366,0.55922663,0.6121081,512,488,board_images_png,0.97851986,0.97851986 +st1246.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1714.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1618.png,knot,0.64436,0.68662965,0.34913963,0.39694545,512,488,board_images_png,0.9157832,0.8893132 +st1618.png,knot,0.21418211,0.248939,0.40383893,0.45200813,512,488,board_images_png,0.8893132,0.8893132 +st1804.png,defect,0.6402425,0.8739144,0.7831069,0.8384883,512,488,board_images_png,0.96215266,0.7692557 +st1804.png,defect,0.4554133,0.6682096,0.88511723,0.9596481,512,488,board_images_png,0.9468119,0.7692557 +st1804.png,knot,0.69052243,0.7642171,0.316173,0.45031247,512,488,board_images_png,0.7692557,0.7692557 +st1419.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1489.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1160.png,knot,0.13819394,0.19427188,0.35991684,0.4188612,512,488,board_images_png,0.99321955,0.88587844 +st1160.png,knot,0.93188983,0.9892542,0.89026076,0.93819654,512,488,board_images_png,0.88587844,0.88587844 +st1217.png,knot,0.5789819,0.7600379,0.6990706,0.92517173,512,488,board_images_png,0.965711,0.965711 +st1735.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1482.png,knot,0.7225375,0.760401,0.39166692,0.4188348,512,488,board_images_png,0.7819961,0.7819961 +st1844.png,knot,0.2735705,0.40624326,0.20760712,0.42487168,512,488,board_images_png,0.98871547,0.93331075 +st1844.png,knot,0.9521196,0.995074,0.34867543,0.4020578,512,488,board_images_png,0.93331075,0.93331075 +st1488.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1382.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1529.png,knot,0.9444826,0.9944176,0.2133748,0.26350862,512,488,board_images_png,0.96461403,0.96461403 +st1427.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1356.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1341.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1384.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1273.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1480.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1240.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1147.png,knot,0.7235606,0.83558035,0.27972806,0.40932456,512,488,board_images_png,0.91702497,0.728294 +st1147.png,knot,0.7145754,0.90363944,0.28619885,0.41697603,512,488,board_images_png,0.728294,0.728294 +st1581.png,knot,0.8395566,0.90542674,0.66599536,0.75553393,512,488,board_images_png,0.9763974,0.9763974 +st1360.png,knot,0.87198615,0.9385322,0.47307807,0.55335397,512,488,board_images_png,0.98329365,0.98329365 +st1398.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1827.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1028.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1504.png,knot,0.7023221,0.7615038,0.34336826,0.42820725,512,488,board_images_png,0.95297855,0.89665526 +st1504.png,knot,0.6145234,0.7415803,0.8845993,1.0,512,488,board_images_png,0.9271189,0.89665526 +st1504.png,defect,0.0021485805,0.27988404,0.5505484,0.59185165,512,488,board_images_png,0.89665526,0.89665526 +st1578.png,knot,0.594302,0.6663906,0.35276932,0.43525606,512,488,board_images_png,0.98448783,0.98448783 +st1036.png,knot,0.028068142,0.0839983,0.806439,0.85608625,512,488,board_images_png,0.9128011,0.9016027 +st1036.png,knot,0.42376837,0.5876269,0.52140605,0.7059735,512,488,board_images_png,0.9016027,0.9016027 +st1499.png,knot,0.5809746,0.6119887,0.4228858,0.47573146,512,488,board_images_png,0.8952005,0.7930336 +st1499.png,knot,0.7553885,0.79633987,0.7965046,0.83087444,512,488,board_images_png,0.7930336,0.7930336 +st1090.png,knot,0.5578696,0.7180711,0.30727062,0.53313506,512,488,board_images_png,0.94313323,0.81639516 +st1090.png,defect,0.73136926,0.8486861,0.6013503,0.6512936,512,488,board_images_png,0.81639516,0.81639516 +st1024.png,defect,0.112691626,0.44226354,0.5310191,0.5835047,512,488,board_images_png,0.87372416,0.7271544 +st1024.png,defect,0.40821457,0.9742286,0.52778643,0.58860064,512,488,board_images_png,0.7271544,0.7271544 +st1601.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1730.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1328.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1404.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1234.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1130.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1432.png,knot,0.2931829,0.3458625,0.56240505,0.60282475,512,488,board_images_png,0.98519933,0.874988 +st1432.png,knot,0.29956195,0.36768192,0.86441845,0.962151,512,488,board_images_png,0.97217214,0.874988 +st1432.png,knot,0.16986336,0.20972233,0.73092186,0.7836456,512,488,board_images_png,0.9664343,0.874988 +st1432.png,knot,0.777344,0.8248272,0.59483975,0.6455976,512,488,board_images_png,0.874988,0.874988 +st1701.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1123.png,knot,0.2767068,0.32175153,0.46701193,0.506633,512,488,board_images_png,0.97385967,0.9535659 +st1123.png,knot,0.7109091,0.7986061,0.6256758,0.7137536,512,488,board_images_png,0.9535659,0.9535659 +st1283.png,knot,0.098313406,0.1568827,0.64341545,0.6876629,512,488,board_images_png,0.9695637,0.92305833 +st1283.png,knot,0.19095805,0.23581375,0.21043442,0.2393901,512,488,board_images_png,0.9597842,0.92305833 +st1283.png,knot,0.78956836,0.9321685,0.43591335,0.615597,512,488,board_images_png,0.92305833,0.92305833 +st1566.png,knot,0.20809694,0.25938302,0.8333139,0.881473,512,488,board_images_png,0.9220271,0.9220271 +st1184.png,knot,0.5389529,0.5887932,0.88446933,0.94065404,512,488,board_images_png,0.98569345,0.9756015 +st1184.png,knot,0.7372067,0.7950743,0.43428457,0.5021385,512,488,board_images_png,0.9756015,0.9756015 +st1325.png,knot,0.41096783,0.4686864,0.3719188,0.4173787,512,488,board_images_png,0.97178966,0.71758956 +st1325.png,defect,0.45774087,0.57515985,0.47131798,0.507235,512,488,board_images_png,0.71758956,0.71758956 +st1486.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1606.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1704.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1134.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1517.png,knot,0.74200016,0.81116307,0.43382335,0.50637746,512,488,board_images_png,0.8954352,0.8954352 +st1225.png,knot,0.2011443,0.41020003,0.23063861,0.3502846,512,488,board_images_png,0.9019703,0.9019703 +st1342.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1294.png,knot,0.5556125,0.6112592,0.3199844,0.37132874,512,488,board_images_png,0.9663085,0.9663085 +st1099.png,knot,0.38057646,0.43379363,0.64525,0.69284534,512,488,board_images_png,0.97903275,0.94032264 +st1099.png,knot,0.783422,0.8516338,0.75065935,0.8024257,512,488,board_images_png,0.94032264,0.94032264 +st1485.png,knot,0.5084776,0.5605646,0.29666436,0.3375187,512,488,board_images_png,0.9734903,0.9734903 +st1778.png,knot,0.33641994,0.40676555,0.5519531,0.6495054,512,488,board_images_png,0.9951767,0.92300224 +st1778.png,knot,0.82943386,0.95425886,0.8764082,0.9973693,512,488,board_images_png,0.92300224,0.92300224 +st1255.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1282.png,knot,0.7397333,0.8512135,0.60256326,0.795918,512,488,board_images_png,0.97338796,0.9186734 +st1282.png,knot,0.00065759657,0.21586742,0.19925785,0.47003567,512,488,board_images_png,0.9186734,0.9186734 +st1492.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1502.png,knot,0.38450775,0.48494667,0.46433184,0.57277703,512,488,board_images_png,0.7445761,0.7445761 +st1301.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1221.png,knot,0.82906234,0.87590575,0.29823613,0.34273428,512,488,board_images_png,0.9677968,0.89999956 +st1221.png,knot,0.5514687,0.67128026,0.23506156,0.39411125,512,488,board_images_png,0.94143856,0.89999956 +st1221.png,knot,0.16929382,0.3767521,0.82198226,0.967369,512,488,board_images_png,0.89999956,0.89999956 +st1313.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1725.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1501.png,defect,0.028466891,0.43053594,0.56010455,0.61603063,512,488,board_images_png,0.788139,0.788139 +st1469.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1030.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1754.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1224.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1846.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1345.png,knot,0.6995347,0.75696117,0.5753143,0.6314817,512,488,board_images_png,0.97813386,0.97813386 +st1691.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1049.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1407.png,knot,0.11778457,0.16466995,0.70767486,0.7456024,512,488,board_images_png,0.9189501,0.8215379 +st1407.png,defect,0.18611541,0.62973213,0.24472722,0.523277,512,488,board_images_png,0.8215379,0.8215379 +st1761.png,knot,0.090693764,0.13790727,0.6114098,0.69016904,512,488,board_images_png,0.98304546,0.98304546 +st1434.png,knot,0.23485328,0.28222427,0.55493677,0.59290344,512,488,board_images_png,0.92389,0.74063563 +st1434.png,defect,0.84828746,0.8820617,0.6607343,0.8046339,512,488,board_images_png,0.74063563,0.74063563 +st1338.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1748.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1372.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1738.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1505.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1236.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1630.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1794.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1114.png,knot,0.9400154,0.9931325,0.5959491,0.6464846,512,488,board_images_png,0.89500606,0.89500606 +st1172.png,knot,0.07152845,0.12519489,0.88483614,0.9465262,512,488,board_images_png,0.9098522,0.9098522 +st1131.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1193.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1514.png,defect,0.12568697,0.69539475,0.5725874,0.62920445,512,488,board_images_png,0.8348586,0.8212201 +st1514.png,knot,0.9272249,0.9948062,0.6711885,0.8692283,512,488,board_images_png,0.8212201,0.8212201 +st1139.png,knot,0.56677705,0.64111996,0.6869818,0.77499914,512,488,board_images_png,0.9921335,0.9774146 +st1139.png,knot,0.29908285,0.39024782,0.93987906,1.0,512,488,board_images_png,0.9774146,0.9774146 +st1020.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1153.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1275.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1320.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1451.png,knot,0.37755623,0.45656112,0.65385526,0.74539596,512,488,board_images_png,0.8936781,0.8936781 +st1111.png,knot,0.69098425,0.7365354,0.4256106,0.4817522,512,488,board_images_png,0.9783952,0.97570944 +st1111.png,knot,0.11195667,0.16339019,0.43002528,0.47924435,512,488,board_images_png,0.97570944,0.97570944 +st1497.png,knot,0.14220633,0.20099886,0.28387493,0.35495216,512,488,board_images_png,0.9926939,0.94570374 +st1497.png,knot,0.13923028,0.1923464,0.7384727,0.81842715,512,488,board_images_png,0.99130225,0.94570374 +st1497.png,knot,0.6346491,0.67967534,0.5156564,0.5566579,512,488,board_images_png,0.94570374,0.94570374 +st1510.png,knot,0.8977143,0.9465768,0.47015342,0.51805454,512,488,board_images_png,0.94509804,0.94509804 +st1829.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1091.png,knot,0.20989896,0.251748,0.34986168,0.3921352,512,488,board_images_png,0.99201256,0.70161 +st1091.png,knot,0.696119,0.7461088,0.27078417,0.33086362,512,488,board_images_png,0.9827361,0.70161 +st1091.png,knot,0.89531857,0.93743694,0.4605299,0.5066802,512,488,board_images_png,0.9794672,0.70161 +st1091.png,defect,0.7629506,1.0,0.6205898,0.67307687,512,488,board_images_png,0.74762243,0.70161 +st1091.png,knot,0.14214082,0.247842,0.7355515,0.8967391,512,488,board_images_png,0.7072498,0.70161 +st1091.png,defect,0.0,0.1281265,0.55038965,0.59755194,512,488,board_images_png,0.70161,0.70161 +st1475.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1358.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1215.png,knot,0.19370292,0.24040401,0.679389,0.7466115,512,488,board_images_png,0.98310626,0.9192501 +st1215.png,knot,0.4819632,0.5332109,0.61278045,0.6552351,512,488,board_images_png,0.98147905,0.9192501 +st1215.png,knot,0.51983714,0.5659016,0.83619606,0.8828397,512,488,board_images_png,0.9192501,0.9192501 +st1670.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1428.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1549.png,knot,0.63825864,0.7578076,0.60389596,0.76869655,512,488,board_images_png,0.9821302,0.94119686 +st1549.png,knot,0.20611124,0.34173003,0.42097017,0.57046175,512,488,board_images_png,0.94119686,0.94119686 +st1274.png,knot,0.48306307,0.52941614,0.33974242,0.37156188,512,488,board_images_png,0.89189124,0.89189124 +st1732.png,knot,0.7792275,0.8772611,0.27241305,0.33139244,512,488,board_images_png,0.82892925,0.82892925 +st1728.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1490.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1511.png,knot,0.8874813,0.9360236,0.6798172,0.72863203,512,488,board_images_png,0.95710415,0.95710415 +st1788.png,knot,0.5836056,0.6444321,0.85830957,0.90954554,512,488,board_images_png,0.9565387,0.9565387 +st1795.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1733.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1393.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1843.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1682.png,defect,0.039656125,0.42582238,0.5525609,0.6027745,512,488,board_images_png,0.86834276,0.76895356 +st1682.png,defect,0.024935054,0.31917572,0.55124557,0.5962398,512,488,board_images_png,0.76895356,0.76895356 +st1385.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1122.png,knot,0.5411203,0.6128086,0.58046335,0.6717515,512,488,board_images_png,0.9858913,0.98368675 +st1122.png,knot,0.056484826,0.11591608,0.5652503,0.6422804,512,488,board_images_png,0.98368675,0.98368675 +st1235.png,knot,0.1276616,0.19752865,0.480699,0.55614275,512,488,board_images_png,0.74746734,0.74746734 +st1524.png,NULL,0,0,0,0,512,488,board_images_png,0,0.05 +st1244.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1508.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1736.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1321.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1442.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1200.png,knot,0.5654346,0.66832656,0.3173933,0.506531,512,488,board_images_png,0.9404969,0.89732975 +st1200.png,knot,0.5703329,0.64266706,0.56360215,0.63275886,512,488,board_images_png,0.89732975,0.89732975 +st1802.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1041.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1093.png,defect,0.029552897,0.5419622,0.5649241,0.6236252,512,488,board_images_png,0.7643857,0.7643857 +st1801.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1012.png,knot,0.08644599,0.13153824,0.5904512,0.6404672,512,488,board_images_png,0.988781,0.9774973 +st1012.png,knot,0.812379,0.8715301,0.672171,0.7355979,512,488,board_images_png,0.9774973,0.9774973 +st1124.png,knot,0.58822644,0.6349528,0.6409561,0.67426634,512,488,board_images_png,0.8600671,0.8600671 +st1523.png,knot,0.4360306,0.48373103,0.7257771,0.760645,512,488,board_images_png,0.75625575,0.75625575 +st1316.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1250.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1616.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1227.png,knot,0.13855492,0.1769987,0.46492472,0.5240612,512,488,board_images_png,0.8713209,0.8713209 +st1813.png,knot,0.8311317,0.953618,0.9412942,1.0,512,488,board_images_png,0.95462394,0.9540965 +st1813.png,knot,0.24278454,0.2941218,0.89843416,0.9542148,512,488,board_images_png,0.9540965,0.9540965 +st1375.png,knot,0.6595113,0.7540372,0.24549623,0.33015925,512,488,board_images_png,0.75854486,0.75854486 +st1308.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1436.png,knot,0.026985489,0.09129417,0.6343242,0.7144944,512,488,board_images_png,0.9840538,0.91817385 +st1436.png,knot,0.6789712,0.7982183,0.28795344,0.44787335,512,488,board_images_png,0.9764556,0.91817385 +st1436.png,knot,0.7039986,0.74745595,0.76152605,0.79802424,512,488,board_images_png,0.91817385,0.91817385 +st1753.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1700.png,knot,0.12157553,0.15873633,0.8837781,0.9422516,512,488,board_images_png,0.9878225,0.9772215 +st1700.png,knot,0.5513875,0.6317189,0.6009193,0.6692112,512,488,board_images_png,0.9772215,0.9772215 +st1545.png,knot,0.29913685,0.3875719,0.66622776,0.75740665,512,488,board_images_png,0.892852,0.892852 +st1677.png,defect,0.15709856,0.4504473,0.5181499,0.5668961,512,488,board_images_png,0.7450831,0.72352904 +st1677.png,defect,0.05941242,0.42110217,0.5009454,0.5704264,512,488,board_images_png,0.72352904,0.72352904 +st1277.png,knot,0.35329628,0.3962914,0.4822781,0.5091933,512,488,board_images_png,0.95444745,0.8027073 +st1277.png,knot,0.52825636,0.56697357,0.23076525,0.256575,512,488,board_images_png,0.8027073,0.8027073 +st1422.png,knot,0.030929413,0.09668733,0.5367563,0.6217439,512,488,board_images_png,0.98173296,0.8505016 +st1422.png,defect,0.2963287,0.38398296,0.49390534,0.53890395,512,488,board_images_png,0.9370916,0.8505016 +st1422.png,knot,0.033933025,0.07376841,0.45551947,0.5043373,512,488,board_images_png,0.8505016,0.8505016 +st1536.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1120.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1493.png,knot,0.5058768,0.5699435,0.3648815,0.4515034,512,488,board_images_png,0.9947812,0.7626483 +st1493.png,knot,0.5272039,0.57829803,0.90566856,0.9900118,512,488,board_images_png,0.9907516,0.7626483 +st1493.png,defect,0.72455865,0.943826,0.27428144,0.31762776,512,488,board_images_png,0.7626483,0.7626483 +st1657.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1369.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1096.png,knot,0.14781785,0.21973783,0.87173665,0.9797294,512,488,board_images_png,0.9355302,0.82652843 +st1096.png,knot,0.016842283,0.088842124,0.21323693,0.2722416,512,488,board_images_png,0.82652843,0.82652843 +st1462.png,knot,0.37319168,0.4497309,0.43395936,0.53268087,512,488,board_images_png,0.88651085,0.88651085 +st1776.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1705.png,knot,0.3318905,0.3849011,0.6956777,0.76191366,512,488,board_images_png,0.96945816,0.96945816 +st1582.png,knot,0.896007,0.9702874,0.4574962,0.55282426,512,488,board_images_png,0.9555228,0.9555228 +st1584.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1119.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1121.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1470.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1056.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1281.png,knot,0.47483134,0.52562714,0.44982663,0.48879707,512,488,board_images_png,0.9294945,0.9294945 +st1033.png,knot,0.20481113,0.24327247,0.7391399,0.82345104,512,488,board_images_png,0.83181804,0.83181804 +st1394.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1039.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1611.png,knot,0.6326234,0.7054164,0.86741334,0.96444726,512,488,board_images_png,0.99616516,0.9843567 +st1611.png,knot,0.07399843,0.11282173,0.32572043,0.36819047,512,488,board_images_png,0.9843567,0.9843567 +st1815.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1343.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1639.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1194.png,knot,0.6489157,0.7092218,0.73913777,0.8004431,512,488,board_images_png,0.98375535,0.98375535 +st1773.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1381.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1180.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1564.png,defect,0.27094924,0.71608114,0.56096876,0.624119,512,488,board_images_png,0.86454993,0.7657204 +st1564.png,defect,0.30570227,0.38637656,0.23720618,0.5720114,512,488,board_images_png,0.7657204,0.7657204 +st1076.png,knot,0.1993315,0.2713721,0.61612856,0.67154235,512,488,board_images_png,0.98148555,0.98148555 +st1449.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1262.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1547.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1388.png,knot,0.77038956,0.8246681,0.2486437,0.3917786,512,488,board_images_png,0.8589068,0.7435218 +st1388.png,defect,0.028243389,0.3578855,0.5529958,0.61774576,512,488,board_images_png,0.7435218,0.7435218 +st1071.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1319.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1323.png,knot,0.27874127,0.33264512,0.78055125,0.82403004,512,488,board_images_png,0.9629542,0.9629542 +st1267.png,defect,0.005203705,0.18179724,0.5817473,0.62368834,512,488,board_images_png,0.95650816,0.8531658 +st1267.png,knot,0.76825047,0.8674093,0.46741802,0.57576823,512,488,board_images_png,0.8531658,0.8531658 +st1571.png,knot,0.18502116,0.23628128,0.82359684,0.8792989,512,488,board_images_png,0.9745147,0.9745147 +st1625.png,knot,0.8698924,0.9376181,0.32788292,0.40407893,512,488,board_images_png,0.9801192,0.8253587 +st1625.png,knot,0.12062892,0.21728584,0.50254875,0.6091943,512,488,board_images_png,0.9581743,0.8253587 +st1625.png,knot,0.8375989,0.9150144,0.8612594,0.95407206,512,488,board_images_png,0.9176986,0.8253587 +st1625.png,defect,0.94997066,1.0,0.30075625,0.34504828,512,488,board_images_png,0.8253587,0.8253587 +st1092.png,defect,0.0,0.5400781,0.5343204,0.60199744,512,488,board_images_png,0.8241516,0.78555936 +st1092.png,defect,0.2487825,0.9571989,0.5449516,0.6100018,512,488,board_images_png,0.78555936,0.78555936 +st1229.png,knot,0.005342853,0.04803483,0.59346676,0.6700139,512,488,board_images_png,0.7080499,0.7080499 +st1023.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1840.png,knot,0.12036637,0.18497443,0.7618415,0.8283344,512,488,board_images_png,0.986,0.986 +st1840.png,knot,0.7297609,0.7755673,0.62443626,0.6670296,512,488,board_images_png,0.986,0.986 +st1840.png,defect,0.76513,0.9952971,0.6075407,0.6546806,512,488,board_images_png,0.986,0.986 +st1808.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1491.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1021.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1421.png,knot,0.11980621,0.18726023,0.7555569,0.8244165,512,488,board_images_png,0.958447,0.958447 +st1339.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1554.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1481.png,knot,0.53876394,0.590091,0.36425796,0.41175127,512,488,board_images_png,0.96814036,0.96814036 +st1377.png,knot,0.05203633,0.09342611,0.8601645,0.9058688,512,488,board_images_png,0.9739779,0.9739779 +st1128.png,knot,0.80625194,0.91632825,0.55765307,0.7005993,512,488,board_images_png,0.9684247,0.8107647 +st1128.png,knot,0.32348785,0.36260453,0.6365056,0.67154855,512,488,board_images_png,0.9590423,0.8107647 +st1128.png,knot,0.072546415,0.11205525,0.3796195,0.42445257,512,488,board_images_png,0.95505,0.8107647 +st1128.png,defect,0.020768221,0.4429854,0.49122843,0.56238526,512,488,board_images_png,0.9002104,0.8107647 +st1128.png,defect,0.17511989,0.46910113,0.49637127,0.5440628,512,488,board_images_png,0.8107647,0.8107647 +st1127.png,knot,0.20359582,0.25129482,0.5636938,0.6114353,512,488,board_images_png,0.9720497,0.71256244 +st1127.png,knot,0.17509237,0.2645835,0.7619097,0.89300954,512,488,board_images_png,0.96749485,0.71256244 +st1127.png,knot,0.1909762,0.23535307,0.45002174,0.50459236,512,488,board_images_png,0.93286186,0.71256244 +st1127.png,knot,0.6537879,0.78765035,0.39515936,0.57093585,512,488,board_images_png,0.8413151,0.71256244 +st1127.png,knot,0.123352595,0.27046844,0.71307296,0.88086075,512,488,board_images_png,0.71256244,0.71256244 +st1017.png,knot,0.15934287,0.21258824,0.69495475,0.74923414,512,488,board_images_png,0.9906481,0.9513629 +st1017.png,knot,0.10266534,0.15678032,0.30125916,0.3437191,512,488,board_images_png,0.96538395,0.9513629 +st1017.png,knot,0.89806724,0.94799244,0.6149101,0.6703377,512,488,board_images_png,0.9513629,0.9513629 +st1649.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1824.png,knot,0.16498344,0.23226959,0.7412715,0.8212256,512,488,board_images_png,0.9905476,0.9675445 +st1824.png,knot,0.9254969,0.9730143,0.46670482,0.51139265,512,488,board_images_png,0.9675445,0.9675445 +st1789.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1063.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1445.png,knot,0.6443819,0.6869104,0.7897866,0.8300049,512,488,board_images_png,0.9764658,0.9764658 +st1519.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1539.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1441.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1155.png,knot,0.056128483,0.12090374,0.67237306,0.71741337,512,488,board_images_png,0.98089087,0.98089087 +st1179.png,knot,0.45837882,0.5055271,0.91032165,0.9597935,512,488,board_images_png,0.97704417,0.97704417 +st1015.png,knot,0.4657949,0.5192351,0.77765274,0.83437693,512,488,board_images_png,0.99483126,0.9771674 +st1015.png,knot,0.62487704,0.6703066,0.582573,0.6338162,512,488,board_images_png,0.9771674,0.9771674 +st1315.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1065.png,knot,0.17431675,0.22347072,0.23654391,0.2848744,512,488,board_images_png,0.9868074,0.7672756 +st1065.png,knot,0.761391,0.81125563,0.7722216,0.81279993,512,488,board_images_png,0.9718078,0.7672756 +st1065.png,knot,0.5324617,0.5758695,0.522541,0.5539788,512,488,board_images_png,0.7672756,0.7672756 +st1357.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1667.png,knot,0.90539,1.0,0.75757515,0.95417035,512,488,board_images_png,0.9598757,0.9598757 +st1671.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1516.png,knot,0.52866757,0.64624226,0.5859474,0.7330528,512,488,board_images_png,0.87794745,0.87794745 +st1101.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1260.png,knot,0.008780143,0.06881653,0.55893314,0.6122557,512,488,board_images_png,0.96111435,0.96111435 +st1351.png,knot,0.37872162,0.43863806,0.55814743,0.6412034,512,488,board_images_png,0.9947301,0.97738373 +st1351.png,knot,0.7162861,0.782209,0.2965802,0.374671,512,488,board_images_png,0.9933106,0.97738373 +st1351.png,knot,0.82775426,0.8713944,0.5412964,0.5987785,512,488,board_images_png,0.97738373,0.97738373 +st1376.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1186.png,knot,0.6058496,0.65804994,0.75309813,0.8104734,512,488,board_images_png,0.98136395,0.98136395 +st1538.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1287.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1053.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1681.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1190.png,defect,0.035275936,0.60065794,0.54249704,0.60351425,512,488,board_images_png,0.8525961,0.76389796 +st1190.png,defect,0.48787197,0.97275436,0.52801114,0.5940613,512,488,board_images_png,0.7868591,0.76389796 +st1190.png,defect,0.018175239,0.38031426,0.5527138,0.6027485,512,488,board_images_png,0.76389796,0.76389796 +st1628.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1522.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1270.png,knot,0.34678867,0.4117869,0.2727826,0.34167832,512,488,board_images_png,0.9446557,0.9446557 +st1659.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1534.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1029.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1594.png,knot,0.33551028,0.38614926,0.16877243,0.21096465,512,488,board_images_png,0.987112,0.8777176 +st1594.png,defect,0.6975558,0.9963596,0.5279547,0.57945627,512,488,board_images_png,0.94186485,0.8777176 +st1594.png,defect,0.064073026,0.73349494,0.5247153,0.58300364,512,488,board_images_png,0.8777176,0.8777176 +st1257.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1585.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1805.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1263.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1265.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1763.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1458.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1530.png,knot,0.53668815,0.5918307,0.8938515,0.94775677,512,488,board_images_png,0.9752803,0.9752803 +st1605.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1604.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1483.png,defect,0.774059,0.98008376,0.43718264,0.4840865,512,488,board_images_png,0.7574974,0.7574974 +st1302.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1579.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1694.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1570.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1711.png,knot,0.5873118,0.6285114,0.37589058,0.43903938,512,488,board_images_png,0.9746859,0.9138147 +st1711.png,knot,0.57629526,0.61687636,0.7231382,0.76612574,512,488,board_images_png,0.9697566,0.9138147 +st1711.png,defect,0.32764342,0.5752018,0.54542994,0.5934744,512,488,board_images_png,0.9138147,0.9138147 +st1140.png,knot,0.8942638,0.9481075,0.5630682,0.6189817,512,488,board_images_png,0.97847044,0.97847044 +st1781.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1348.png,knot,0.0,0.031571154,0.6337116,0.6920282,512,488,board_images_png,0.9581143,0.89654917 +st1348.png,knot,0.6687841,0.7174068,0.43589646,0.47504494,512,488,board_images_png,0.89654917,0.89654917 +st1615.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1170.png,knot,0.4402049,0.49130502,0.8758248,0.931154,512,488,board_images_png,0.984606,0.9822368 +st1170.png,knot,0.81391335,0.86345357,0.76427454,0.82079476,512,488,board_images_png,0.9822368,0.9822368 +st1690.png,knot,0.19851859,0.24199493,0.3909187,0.42710394,512,488,board_images_png,0.9121909,0.9121909 +st1792.png,knot,0.23595089,0.29004455,0.2956416,0.3383845,512,488,board_images_png,0.9906427,0.910417 +st1792.png,knot,0.8010435,0.8440271,0.8672871,0.9091894,512,488,board_images_png,0.910417,0.910417 +st1803.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1569.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1551.png,knot,0.63619053,0.7649639,0.40705118,0.5653195,512,488,board_images_png,0.99102426,0.98297393 +st1551.png,knot,0.17823221,0.31843635,0.6092808,0.78658724,512,488,board_images_png,0.98297393,0.98297393 +st1660.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1307.png,knot,0.19044122,0.22998337,0.2392788,0.29569614,512,488,board_images_png,0.9176673,0.75701463 +st1307.png,knot,0.4755669,0.5202824,0.24868643,0.29236877,512,488,board_images_png,0.8936444,0.75701463 +st1307.png,knot,0.756844,0.7962363,0.23921326,0.28953588,512,488,board_images_png,0.75701463,0.75701463 +st1330.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1749.png,knot,0.29975393,0.38158545,0.47838268,0.55155176,512,488,board_images_png,0.98031086,0.98031086 +st1521.png,knot,0.46013328,0.5463568,0.4740795,0.5873207,512,488,board_images_png,0.9323549,0.9323549 +st1672.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1809.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1104.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1586.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1533.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1727.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1762.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1561.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1220.png,knot,0.12324664,0.16531894,0.88222283,0.9185721,512,488,board_images_png,0.7501036,0.7501036 +st1849.png,knot,0.8353676,0.9049195,0.9066991,0.9460937,512,488,board_images_png,0.88539225,0.88539225 +st1064.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1304.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1136.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1842.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1216.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1478.png,knot,0.3499687,0.4842524,0.57836384,0.7447651,512,488,board_images_png,0.9697584,0.9697584 +st1772.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1142.png,knot,0.10130471,0.1666631,0.93701386,1.0,512,488,board_images_png,0.99122393,0.96465534 +st1142.png,knot,0.42964286,0.48378333,0.73776567,0.79399115,512,488,board_images_png,0.9888548,0.96465534 +st1142.png,knot,0.0,0.025988707,0.42292097,0.4892505,512,488,board_images_png,0.96465534,0.96465534 +st1070.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1683.png,defect,0.5329239,0.97495586,0.5084751,0.5724312,512,488,board_images_png,0.9590508,0.7539403 +st1683.png,defect,0.030474966,0.38542023,0.5292042,0.5737903,512,488,board_images_png,0.9506095,0.7539403 +st1683.png,knot,0.51778054,0.56286615,0.5672909,0.64855087,512,488,board_images_png,0.9187184,0.7539403 +st1683.png,knot,0.78114593,0.82849365,0.9175072,0.9523704,512,488,board_images_png,0.877884,0.7539403 +st1683.png,defect,0.56203073,0.6564255,0.18660906,0.4561023,512,488,board_images_png,0.7758399,0.7539403 +st1683.png,defect,0.5718204,0.9096621,0.51635915,0.56507355,512,488,board_images_png,0.7539403,0.7539403 +st1423.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1787.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1692.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1848.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1055.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1026.png,knot,0.2674017,0.35383838,0.39859554,0.50976944,512,488,board_images_png,0.9884343,0.96366304 +st1026.png,knot,0.69417506,0.744075,0.34379873,0.39051458,512,488,board_images_png,0.97863936,0.96366304 +st1026.png,defect,0.70078284,0.9907891,0.5857268,0.6470487,512,488,board_images_png,0.96366304,0.96366304 +st1826.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1047.png,knot,0.3542995,0.45425007,0.5452347,0.67343616,512,488,board_images_png,0.98686105,0.85297066 +st1047.png,defect,0.12903255,0.31020302,0.31878722,0.3649246,512,488,board_images_png,0.85297066,0.85297066 +st1751.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1786.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1069.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1037.png,knot,0.6725518,0.7388108,0.6160668,0.68687695,512,488,board_images_png,0.9883166,0.71845925 +st1037.png,knot,0.15919425,0.23021916,0.60911787,0.681755,512,488,board_images_png,0.9800915,0.71845925 +st1037.png,defect,0.005260315,0.34612125,0.51574904,0.5804733,512,488,board_images_png,0.79289407,0.71845925 +st1037.png,defect,0.018672304,0.322812,0.5305951,0.57148284,512,488,board_images_png,0.71845925,0.71845925 +st1821.png,knot,0.11944925,0.19018118,0.9087665,0.9943758,512,488,board_images_png,0.99410444,0.9670556 +st1821.png,knot,0.59818923,0.65603995,0.6801038,0.7442376,512,488,board_images_png,0.9670556,0.9670556 +st1364.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1192.png,knot,0.39846605,0.45543727,0.36765742,0.4488806,512,488,board_images_png,0.99612194,0.7127546 +st1192.png,defect,0.07790943,0.44866413,0.5975798,0.640683,512,488,board_images_png,0.80447847,0.7127546 +st1192.png,defect,0.47953823,0.7499259,0.5517361,0.59940904,512,488,board_images_png,0.7127546,0.7127546 +st1602.png,knot,0.9574499,1.0,0.42608044,0.4791277,512,488,board_images_png,0.72733927,0.72733927 +st1379.png,knot,0.29298508,0.43081832,0.5901556,0.77116805,512,488,board_images_png,0.9793032,0.9793032 +st1440.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1719.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1716.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1108.png,knot,0.69631505,0.73806465,0.6270994,0.6722544,512,488,board_images_png,0.9886671,0.90558296 +st1108.png,knot,0.6181019,0.69406986,0.23797426,0.29145443,512,488,board_images_png,0.9754843,0.90558296 +st1108.png,knot,0.12284262,0.15919161,0.6235059,0.686582,512,488,board_images_png,0.9687026,0.90558296 +st1108.png,knot,0.29641727,0.3453667,0.5215502,0.5541794,512,488,board_images_png,0.90558296,0.90558296 +st1798.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1596.png,knot,0.7262323,0.7746997,0.23982525,0.2841002,512,488,board_images_png,0.9764713,0.8792314 +st1596.png,defect,0.0,0.18872425,0.6252165,0.6738723,512,488,board_images_png,0.8792314,0.8792314 +st1107.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1593.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1740.png,knot,0.2112985,0.2654443,0.6285077,0.6783256,512,488,board_images_png,0.97739637,0.97739637 +st1463.png,knot,0.46628618,0.5137217,0.5166115,0.5817609,512,488,board_images_png,0.90653586,0.90653586 +st1249.png,knot,0.67133003,0.7416619,0.56961924,0.63172895,512,488,board_images_png,0.7778287,0.7778287 +st1292.png,knot,0.20523047,0.32447684,0.19784732,0.3516566,512,488,board_images_png,0.98957986,0.9779925 +st1292.png,knot,0.76388824,0.861159,0.8165224,0.9411841,512,488,board_images_png,0.9779925,0.9779925 +st1232.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1734.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1332.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1799.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1696.png,knot,0.5082208,0.55826336,0.40370333,0.4456584,512,488,board_images_png,0.9750294,0.9750294 +st1353.png,knot,0.20250683,0.26231235,0.70863116,0.778109,512,488,board_images_png,0.99471915,0.87796557 +st1353.png,knot,0.9440131,0.9763329,0.7736546,0.83807546,512,488,board_images_png,0.87796557,0.87796557 +st1042.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1088.png,knot,0.63853097,0.70713216,0.5170407,0.5931604,512,488,board_images_png,0.99378115,0.97339106 +st1088.png,knot,0.016109794,0.12432815,0.6511545,0.8050783,512,488,board_images_png,0.97339106,0.97339106 +st1764.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1706.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1555.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1157.png,knot,0.55773807,0.6353789,0.796305,0.90967226,512,488,board_images_png,0.98938215,0.97597164 +st1157.png,knot,0.6030029,0.70828706,0.46024242,0.56399477,512,488,board_images_png,0.97597164,0.97597164 +st1310.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1331.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1137.png,knot,0.5549138,0.655619,0.6309172,0.75291824,512,488,board_images_png,0.98047423,0.9802043 +st1137.png,knot,0.6839061,0.796325,0.28546476,0.4368632,512,488,board_images_png,0.9802043,0.9802043 +st1311.png,knot,0.17855604,0.22768492,0.59137386,0.6507016,512,488,board_images_png,0.96903574,0.96903574 +st1466.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1051.png,knot,0.7411262,0.8467681,0.33830678,0.44452292,512,488,board_images_png,0.9951338,0.74332273 +st1051.png,knot,0.5914114,0.62914854,0.30536783,0.32768086,512,488,board_images_png,0.74332273,0.74332273 +st1830.png,knot,0.2591614,0.3020172,0.30837727,0.3550865,512,488,board_images_png,0.9754875,0.96484137 +st1830.png,knot,0.88722545,0.9334164,0.37343535,0.4335522,512,488,board_images_png,0.96484137,0.96484137 +st1040.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1556.png,knot,0.5110332,0.56798416,0.5350004,0.59305537,512,488,board_images_png,0.9750122,0.7373006 +st1556.png,defect,0.12589139,0.23129989,0.5029541,0.5395938,512,488,board_images_png,0.7373006,0.7373006 +st1702.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1038.png,knot,0.31787464,0.36652422,0.33077985,0.3723501,512,488,board_images_png,0.97705007,0.97705007 +st1335.png,NULL,0,0,0,0,512,488,board_images_png,0,0.0 +st1202.png,knot,0.7601946,0.8447372,0.46703416,0.56048256,512,488,board_images_png,0.94609445,0.94609445 +st1518.png,knot,0.56253266,0.6819586,0.4816731,0.61785614,512,488,board_images_png,0.97566086,0.87584656 +st1518.png,knot,0.106342345,0.17423195,0.41895986,0.48053488,512,488,board_images_png,0.8768969,0.87584656 +st1518.png,knot,0.7848372,0.84227246,0.61229557,0.64354527,512,488,board_images_png,0.87584656,0.87584656 diff --git a/test/totag_source2.csv b/test/totag_source2.csv new file mode 100644 index 00000000..afa27457 --- /dev/null +++ b/test/totag_source2.csv @@ -0,0 +1,11 @@ +filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence +st1026.png,knot,0.69150746,0.7407868,0.3375946,0.39474854,512,488,board_images_png,0.9990602,0.54169416 +st1026.png,knot,0.29255274,0.37531677,0.41773036,0.48604906,512,488,board_images_png,0.74185294,0.54169416 +st1026.png,knot,0.29603952,0.35703427,0.40142354,0.49790853,512,488,board_images_png,0.54169416,0.54169416 +st1578.png,knot,0.54391885,0.60184073,0.7846939,0.85633487,512,488,board_images_png,0.9994636,0.9942725 +st1578.png,knot,0.60079277,0.6762777,0.36906424,0.4369791,512,488,board_images_png,0.9942725,0.9942725 +st1611.png,knot,0.65116334,0.7139255,0.86043906,0.9666604,512,488,board_images_png,0.99822897,0.9488958 +st1611.png,knot,0.07768918,0.1141083,0.332125,0.36988598,512,488,board_images_png,0.9488958,0.9488958 +st1840.png,knot,0.12473148,0.18879795,0.76679623,0.8271259,512,488,board_images_png,0.99770314,0.5600077 +st1840.png,knot,0.72636276,0.7735574,0.62331045,0.66846347,512,488,board_images_png,0.98784816,0.5600077 +st1840.png,knot,0.59846735,0.6426683,0.2617435,0.32195628,512,488,board_images_png,0.5600077,0.5600077 \ No newline at end of file diff --git a/test/untagged_cow.csv b/test/untagged_cow.csv new file mode 100644 index 00000000..6d2178cf --- /dev/null +++ b/test/untagged_cow.csv @@ -0,0 +1,15 @@ +filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence +IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,1,0.0,0.31701845,0.0077313487,0.55989933,480,600,camera_images,0.99540824,0.7061533 +IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,1,0.25898764,0.5642854,0.104834646,0.53862405,480,600,camera_images,0.9025605,0.7061533 +IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,35,0.18555662,0.5546974,0.48929358,0.5928695,480,600,camera_images,0.7061533,0.7061533 +IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG,19,0.08357386,0.3895763,0.29830444,0.6114975,480,600,camera_images,0.90615094,0.90615094 +IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG,NULL,0,0,0,0,480,600,camera_images,0,0.0 +IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG,16,0.48581073,0.9471678,0.46597427,0.9487598,480,600,camera_images,0.6689594,0.6689594 +IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG,1,0.2735326,0.44143108,0.14391276,0.59441584,480,600,camera_images,0.9997955,0.9007767 +IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG,27,0.25880748,0.32737294,0.2144454,0.40248972,480,600,camera_images,0.9007767,0.9007767 +IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG,16,0.0,0.7922343,0.2661451,0.80265963,480,600,camera_images,0.849446,0.7052012 +IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG,22,0.044430044,0.7727591,0.2618165,0.8102997,480,600,camera_images,0.7052012,0.7052012 +IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG,NULL,0,0,0,0,480,640,camera_images,0,0.0 +IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG,19,0.02553936,0.47359988,0.02941447,0.93185467,480,853,camera_images,0.9975829,0.9975829 +IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG,19,0.45203093,0.75573874,0.061686885,0.681505,480,853,camera_images,0.9460018,0.9460018 +IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG,18,0.5594412,0.8323967,0.2793807,0.6407886,480,853,camera_images,0.7723789,0.7723789 diff --git a/test/untagged_source.csv b/test/untagged_source.csv new file mode 100644 index 00000000..a7043238 --- /dev/null +++ b/test/untagged_source.csv @@ -0,0 +1,9 @@ +filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence +st1026.png,knot,0.69150746,0.7407868,0.3375946,0.39474854,512,488,board_images_png,0.9990602,0.54169416 +st1026.png,knot,0.29255274,0.37531677,0.41773036,0.48604906,512,488,board_images_png,0.74185294,0.54169416 +st1026.png,knot,0.29603952,0.35703427,0.40142354,0.49790853,512,488,board_images_png,0.54169416,0.54169416 +st1194.png,knot,0.6518282,0.70353997,0.7374667,0.80387944,512,488,board_images_png,0.99921286,0.99921286 +st1578.png,knot,0.54391885,0.60184073,0.7846939,0.85633487,512,488,board_images_png,0.9994636,0.9942725 +st1578.png,knot,0.60079277,0.6762777,0.36906424,0.4369791,512,488,board_images_png,0.9942725,0.9942725 +st1611.png,knot,0.65116334,0.7139255,0.86043906,0.9666604,512,488,board_images_png,0.99822897,0.9488958 +st1611.png,knot,0.07768918,0.1141083,0.332125,0.36988598,512,488,board_images_png,0.9488958,0.9488958 diff --git a/train/__init__.py b/train/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/train/active_learning_init_pred.sh b/train/active_learning_init_pred.sh new file mode 100644 index 00000000..fad1cfea --- /dev/null +++ b/train/active_learning_init_pred.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Source environmental variables +set -a +sed -i 's/\r//g' $1 +. $1 +set +a +# Updating vars in config file +envsubst < $1 > cur_config.ini +# Update images from blob storage +echo "Updating Blob Folder" +python ${python_file_directory}/update_blob_folder.py cur_config.ini +# Create TFRecord from images + csv file on blob storage +echo "Download MS COCO tf model if it doesn't exist" +# Download tf model if it doesn't exist +if [ ! -d "$download_location/${init_model_name}" ]; then + mkdir -p $download_location + curl $init_pred_tf_url --create-dirs -o ${download_location}/${init_model_name}.tar.gz + tar -xzf ${download_location}/${init_model_name}.tar.gz -C $download_location +fi + + +echo "Running pretratined model on the images" +python ${python_file_directory}/create_predictions.py cur_config.ini init_pred $init_model_graph +# Rename predictions and inference graph based on timestamp and upload +echo "Uploading new data" + +az storage blob upload --container-name $label_container_name --file $untagged_output --name init_totag_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY \ No newline at end of file diff --git a/train/active_learning_train.sh b/train/active_learning_train.sh index d57dc5b2..24b276ba 100755 --- a/train/active_learning_train.sh +++ b/train/active_learning_train.sh @@ -1,4 +1,6 @@ #!/bin/bash +# Fail on first error +set -e # Source environmental variables set -a sed -i 's/\r//g' $1 diff --git a/train/create_predictions.py b/train/create_predictions.py index 85144d96..c5089595 100644 --- a/train/create_predictions.py +++ b/train/create_predictions.py @@ -10,6 +10,20 @@ NUM_CHANNELS=3 FOLDER_LOCATION=8 +PREDICTIONS_SCHEMA = \ + ["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"] +PREDICTIONS_SCHEMA_NO_FOLDER =\ + ["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"] + +#name,prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX],prediction[YMIN_IDX],prediction[YMAX_IDX],height,width,folder,prediction[BOX_CONFID_IDX], confidence +BOX_CONFID_IDX = 0 +CLASS_IDX = 1 +XMIN_IDX = 3 +XMAX_IDX = 5 +YMIN_IDX = 2 +YMAX_IDX = 4 + + def calculate_confidence(predictions): return min([float(prediction[0]) for prediction in predictions]) @@ -23,25 +37,34 @@ def make_csv_output(all_predictions: List[List[List[int]]], all_names: List[str] tagged_writer = csv.writer(tagged_file) untagged_writer = csv.writer(untagged_file) if user_folders: - tagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"]) - untagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"]) + tagged_writer.writerow(PREDICTIONS_SCHEMA) + untagged_writer.writerow(PREDICTIONS_SCHEMA) else: - tagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"]) - untagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"]) + tagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER) + untagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER) if user_folders: for (folder, name), predictions, (height, width) in zip(all_names, all_predictions, all_sizes): if not predictions: predictions = [[0,"NULL",0,0,0,0]] confidence = calculate_confidence(predictions) for prediction in predictions: - (tagged_writer if name in file_set[folder] else untagged_writer).writerow([name,prediction[1],prediction[3],prediction[5],prediction[2],prediction[4],height,width,folder,prediction[0], confidence]) + (tagged_writer if name in file_set[folder] else untagged_writer).writerow([ + name, + prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX], + prediction[YMIN_IDX],prediction[YMAX_IDX],height,width, + folder, + prediction[BOX_CONFID_IDX], confidence]) else: for name, predictions, (height,width) in zip(all_names, all_predictions, all_sizes): if not predictions: predictions = [[0,"NULL",0,0,0,0]] confidence = calculate_confidence(predictions) for prediction in predictions: - (tagged_writer if name in file_set else untagged_writer).writerow([name,prediction[1],prediction[3],prediction[5],prediction[2],prediction[4],height,width,prediction[0], confidence]) + (tagged_writer if name in file_set else untagged_writer).writerow([ + name, + prediction[CLASS_IDX], prediction[XMIN_IDX], prediction[XMAX_IDX], + prediction[YMIN_IDX], prediction[YMAX_IDX], height, width, + prediction[BOX_CONFID_IDX], confidence]) def get_suggestions(detector, basedir: str, untagged_output: str, tagged_output: str, cur_tagged: str, cur_tagging: str, min_confidence: float =.2, @@ -60,6 +83,7 @@ def get_suggestions(detector, basedir: str, untagged_output: str, basedir = Path(basedir) CV2_COLOR_LOAD_FLAG = 1 all_predictions = [] + all_tagged = [] if user_folders: # TODO: Cross reference with ToTag # download latest tagging and tagged @@ -77,11 +101,13 @@ def get_suggestions(detector, basedir: str, untagged_output: str, for row in all_tagged: already_tagged[row[FOLDER_LOCATION]].add(row[0]) subdirs = [subfile for subfile in basedir.iterdir() if subfile.is_dir()] + print("subdirs: ", subdirs) all_names = [] all_image_files = [] all_sizes = [] for subdir in subdirs: cur_image_names = list(subdir.rglob(filetype)) + print("Total image names: ", len(cur_image_names)) all_image_files += [str(image_name) for image_name in cur_image_names] foldername = subdir.stem all_names += [(foldername, filename.name) for filename in cur_image_names] @@ -90,6 +116,7 @@ def get_suggestions(detector, basedir: str, untagged_output: str, all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) for curindex, image in enumerate(all_image_files): all_images[curindex] = cv2.resize(cv2.imread(image, CV2_COLOR_LOAD_FLAG), image_size) + print("Shape of all_images: ", all_images.shape) all_predictions = detector.predict(all_images, min_confidence=min_confidence) else: with open(cur_tagged, 'r') as file: @@ -106,7 +133,7 @@ def get_suggestions(detector, basedir: str, untagged_output: str, all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) for curindex, image in enumerate(all_image_files): all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size) - all_predictions = detector.predict(all_images, min_confidence=min_confidence) + all_predictions = detector.predict(all_images, batch_size=2, min_confidence=min_confidence) make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, user_folders) if __name__ == "__main__": @@ -130,13 +157,25 @@ def get_suggestions(detector, basedir: str, untagged_output: str, container_name = config_file["label_container_name"] file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)] cur_tagged = None - if file_date: - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagged.csv") - cur_tagged = "tagged.csv" - file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)] cur_tagging = None - if file_date: - block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagging.csv") - cur_tagging = "tagging.csv" - cur_detector = TFDetector(config_file["classes"].split(","), str(Path(config_file["inference_output_dir"])/"frozen_inference_graph.pb")) + classes = [] + model = None + if len(sys.argv) > 3 and (sys.argv[2].lower() =='init_pred'): + print("Using MS COCO pretrained model to detect known 90 classes. For class id <-> name mapping check this file: https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_label_map.pbtxt") + model = sys.argv[3] + print("Using model: " + model) + classesIDs = list(range(1, 91)) + classes = [str(x) for x in classesIDs] + else: + classes = config_file["classes"].split(",") + model = str(Path(config_file["inference_output_dir"])/"frozen_inference_graph.pb") + if file_date: + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagged.csv") + cur_tagged = "tagged.csv" + file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)] + if file_date: + block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagging.csv") + cur_tagging = "tagging.csv" + + cur_detector = TFDetector(classes, model) get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, filetype=config_file["filetype"], min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"]=="True") diff --git a/train/repartition_test_set.py b/train/repartition_test_set.py index 1ee82464..0c2627c2 100644 --- a/train/repartition_test_set.py +++ b/train/repartition_test_set.py @@ -8,6 +8,8 @@ import sys import os +random.seed(42) + # Allow us to import utils config_dir = str(Path.cwd().parent / "utils") if config_dir not in sys.path: diff --git a/train/tf_detector.py b/train/tf_detector.py index 2e5a6449..e8013cfc 100644 --- a/train/tf_detector.py +++ b/train/tf_detector.py @@ -1,8 +1,7 @@ -import re - import numpy as np import tensorflow as tf + class TFDetector(): def __init__(self, classes, inference_graph="frozen_graph.pb"): @@ -33,8 +32,11 @@ def predict(self, images_data, batch_size=10, min_confidence=.7): image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') split_data = [images_data[i:i+batch_size] for i in range(0,images_data.shape[0],batch_size)] split_data = [sess.run(tensor_dict, feed_dict={image_tensor: batch}) for batch in split_data] - split_data = [np.dstack((batch['detection_scores'], self.label_arr[batch['detection_classes'].astype(np.uint8)], batch['detection_boxes'])) for batch in split_data] + split_data = [np.dstack((batch['detection_scores'], + self.label_arr[batch['detection_classes'].astype(np.uint8)], + batch['detection_boxes'])) for batch in split_data] combined = np.concatenate(split_data) non_zero = combined[:,:,0].astype(np.float)>min_confidence return [sorted(cur_combined[cur_non_zero].tolist(), reverse=True) for cur_combined, cur_non_zero in zip(combined, non_zero)] - + + diff --git a/train_vnext/README.md b/train_vnext/README.md new file mode 100644 index 00000000..74464169 --- /dev/null +++ b/train_vnext/README.md @@ -0,0 +1,133 @@ +# Training vNext + +This document covers the most basic instructions to get a training session going on an in development version of the system that uses a new data management paradigm. + +The instructions assume you are familiar with the original manual instructions [here](../Readme.me) and it's woodblock dataset. We also assume your are working with a Training environment (most likely SSHing into a deployed [Azure DSVM](../devops/dsvm/Readme.md) + +Consider this to be a **pre-release** version with instructions that will evolve quickly as more automation gets built. + +## Set up legacy config.ini ++ Go to the [config.ini](../config.ini) in your training environment ++ Update the values for the following keys: + + python_file_directory + + data_dir + + train_dir + + inference_output_dir + + tf_models_location + + download_location + + user_folders (**Must be set to false for the moment**) + + classes + + filetype + + For instance on an Azure DSVM the values could be + +``` +python_file_directory=/data/home/abrig/repos/models/research/active-learning-detect/train_vnext + +data_dir=/data/home/abrig/actlrn-data + +train_dir=/data/home/abrig/actlrn-data/training + +inference_output_dir=knots_inference_graphs + +tf_models_location=/data/home/abrig/repos/models/research + +download_location=/data/home/abrig/actlrn-downloads + +user_folders=False + +classes=knot,defect + +filetype=*.png +``` +**Note**: The value for config.ini KEY **_python_file_directory_** must be the absolute file path + +## Set up CLI + +Follow the instructions [here](../cli/Readme.md) to make sure your CLI configuration file exists and is set up. Most importantly you should have your CLI config associated with the ALCONFGI environment variable + + +## Set up data dependencies +In a command window navigate to the root (one level up from here) of the repo. + +**Note:** If you have previosuly followed the original manual instructions [here](../Readme.me) your Active Learning repo will be within your Tensorflow repo (e.g ~/repos/models/research/active-learning-detect/). The [DSVM deployment script](../devops/dsvm/Readme.md) will automate this setup for you. + +Run the following command +``` +python -m train_vnext.training start -c ~/repos/models/research/active-learning-detect/config.ini +``` + +This command will +- Verify the config.ini file +- Download the all the images, +- Create the human annotated image labels as a CSV +- Create a list of "in progress" images as a CSV +- Create the PASCAL VOC file based on the classification name values ("classes" key) in config.ini + +**Note:** You may receive errors about missing configuration keys if your config.ini is not setup correctly + +## Execute training +Next we will navigate to the _training_vnext_ directory and run the command + +``` +sh active_learning_train.sh ~/repos/models/research/active-learning-detect/config.ini +``` + +This process can take 10 mins or longer to execute. + +## Save training session +Navigate back to the root of your Active Learning repo and execute: + +``` +python -m train_vnext.training save -c ~/repos/models/research/active-learning-detect/config.ini +``` + +This command will +* Persist the training session's inference graph in cloud storage +* Log a new training session to the database +* Log all performance and prediction datat from the training session to the database + + +## Review training performance +To quickly review training and class performance over time use the following queries directly against your [deployed](../db/Readme.md) database + + +Class performance over time +``` +SELECT + c.classificationname, + p.avgperf, + t.classperfavg +FROM training_info t +join class_performance p on t.trainingid = p.trainingid +join classification_info c on c.classificationid = p.classificationid +order by t.createddtim desc; +``` + +Least confident classifications +``` +SELECT + p.imageid, + c.classificationname, + p.boxconfidence, + p.imageconfidence +FROM Prediction_Labels p +join classification_info c on c.classificationid = p.classificationid +where trainingid = (SELECT MAX(trainingid) FROM training_info) +order by boxconfidence ASC; +``` + +Most confident classifications +``` +SELECT + p.imageid, + c.classificationname, + p.boxconfidence, + p.imageconfidence +FROM Prediction_Labels p +join classification_info c on c.classificationid = p.classificationid +where trainingid = (SELECT MAX(trainingid) FROM training_info) +order by boxconfidence DESC; +``` + + diff --git a/train_vnext/__init__.py b/train_vnext/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/train_vnext/active_learning_train.sh b/train_vnext/active_learning_train.sh new file mode 100644 index 00000000..7c334223 --- /dev/null +++ b/train_vnext/active_learning_train.sh @@ -0,0 +1,62 @@ +#!/bin/bash +#TODO: We would like to fail on first error but it kills the SSH connection. Use "sh filename.sh" from bash +set -e +# Source environmental variables +set -a +sed -i 's/\r//g' $1 +. $1 +set +a +# Updating vars in config file +envsubst < $1 > cur_config.ini + +# Create TFRecord from images + csv file on blob storage +echo "Creating TF Record" +python ${python_file_directory}/create_tf_record.py cur_config.ini + +# Download tf model if it doesn't exist +if [ ! -d "$download_location/${model_name}" ]; then + mkdir -p $download_location + curl $tf_url --create-dirs -o ${download_location}/${model_name}.tar.gz + tar -xzf ${download_location}/${model_name}.tar.gz -C $download_location +fi +if [ ! -z "$optional_pipeline_url" ]; then + curl $optional_pipeline_url -o $pipeline_file +elif [ ! -f $pipeline_file ]; then + cp ${download_location}/${model_name}/pipeline.config $pipeline_file +fi + +echo "Making pipeline file from env vars" +temp_pipeline=${pipeline_file%.*}_temp.${pipeline_file##*.} +sed "s/${old_label_path//\//\\/}/${label_map_path//\//\\/}/g" $pipeline_file > $temp_pipeline +sed -i "s/${old_train_path//\//\\/}/${tf_train_record//\//\\/}/g" $temp_pipeline +sed -i "s/${old_val_path//\//\\/}/${tf_val_record//\//\\/}/g" $temp_pipeline +sed -i "s/${old_checkpoint_path//\//\\/}/${fine_tune_checkpoint//\//\\/}/g" $temp_pipeline +sed -i "s/$num_steps_marker[[:space:]]*[[:digit:]]*/$num_steps_marker $train_iterations/g" $temp_pipeline +sed -i "s/$num_examples_marker[[:space:]]*[[:digit:]]*/$num_examples_marker $eval_iterations/g" $temp_pipeline +sed -i "s/$num_classes_marker[[:space:]]*[[:digit:]]*/$num_classes_marker $num_classes/g" $temp_pipeline + +echo "" +echo "Pipeline file written to $temp_pipeline" +echo "" + +# Train model on TFRecord +echo "Training model" +rm -rf $train_dir +python ${tf_location_legacy}/train.py --train_dir=$train_dir --pipeline_config_path=$temp_pipeline --logtostderr + +# Export inference graph of model +echo "Exporting inference graph" +rm -rf $inference_output_dir +python ${tf_location}/export_inference_graph.py --input_type "image_tensor" --pipeline_config_path "$temp_pipeline" --trained_checkpoint_prefix "${train_dir}/model.ckpt-$train_iterations" --output_directory "$inference_output_dir" +# TODO: Validation on Model, keep track of MAP etc. +# Use inference graph to create predictions on untagged images +echo "Creating new predictions" +python ${python_file_directory}/create_predictions.py cur_config.ini +echo "Calculating performance" +python ${python_file_directory}/map_validation.py cur_config.ini +# Rename predictions and inference graph based on timestamp and upload + +# echo "Uploading new data" +# az storage blob upload --container-name $label_container_name --file ${inference_output_dir}/frozen_inference_graph.pb --name model_$(date +%s).pb --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY +# az storage blob upload --container-name $label_container_name --file $untagged_output --name totag_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY +# az storage blob upload --container-name $label_container_name --file $validation_output --name performance_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY diff --git a/train_vnext/create_predictions.py b/train_vnext/create_predictions.py new file mode 100644 index 00000000..7b2344b4 --- /dev/null +++ b/train_vnext/create_predictions.py @@ -0,0 +1,185 @@ +from functools import reduce +from pathlib import Path +from typing import List, Tuple, Dict, AbstractSet +import json +import cv2 +import csv +from collections import defaultdict +import numpy as np +import logging + +NUM_CHANNELS=3 +FOLDER_LOCATION=8 + +PREDICTIONS_SCHEMA = \ + ["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"] +PREDICTIONS_SCHEMA_NO_FOLDER =\ + ["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"] + +#name,prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX],prediction[YMIN_IDX],prediction[YMAX_IDX],height,width,folder,prediction[BOX_CONFID_IDX], confidence +BOX_CONFID_IDX = 0 +CLASS_IDX = 1 +XMIN_IDX = 3 +XMAX_IDX = 5 +YMIN_IDX = 2 +YMAX_IDX = 4 + + +def calculate_confidence(predictions): + return min([float(prediction[0]) for prediction in predictions]) + +def make_csv_output(all_predictions: List[List[List[int]]], all_names: List[str], all_sizes: List[Tuple[int]], + untagged_output: str, tagged_output: str, file_set: AbstractSet, user_folders: bool = True): + ''' + Convert list of Detector class predictions as well as list of image sizes + into a dict matching the VOTT json format. + ''' + with open(tagged_output, 'w', newline='') as tagged_file, open(untagged_output, 'w', newline='') as untagged_file: + tagged_writer = csv.writer(tagged_file) + untagged_writer = csv.writer(untagged_file) + if user_folders: + tagged_writer.writerow(PREDICTIONS_SCHEMA) + untagged_writer.writerow(PREDICTIONS_SCHEMA) + else: + tagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER) + untagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER) + if user_folders: + for (folder, name), predictions, (height, width) in zip(all_names, all_predictions, all_sizes): + if not predictions: + predictions = [[0,"NULL",0,0,0,0]] + confidence = calculate_confidence(predictions) + for prediction in predictions: + (tagged_writer if name in file_set[folder] else untagged_writer).writerow([ + name, + prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX], + prediction[YMIN_IDX],prediction[YMAX_IDX],height,width, + folder, + prediction[BOX_CONFID_IDX], confidence]) + else: + for name, predictions, (height,width) in zip(all_names, all_predictions, all_sizes): + if not predictions: + predictions = [[0,"NULL",0,0,0,0]] + confidence = calculate_confidence(predictions) + for prediction in predictions: + (tagged_writer if name in file_set else untagged_writer).writerow([ + name, + prediction[CLASS_IDX], prediction[XMIN_IDX], prediction[XMAX_IDX], + prediction[YMIN_IDX], prediction[YMAX_IDX], height, width, + prediction[BOX_CONFID_IDX], confidence]) + +def get_suggestions(detector, basedir: str, untagged_output: str, + tagged_output: str, cur_tagged: str, cur_tagging: str, min_confidence: float =.2, + image_size: Tuple=(1000,750), filetype: str="*.jpg", minibatchsize: int=50, + user_folders: bool=True): + '''Gets suggestions from a given detector and uses them to generate VOTT tags + + Function inputs an instance of the Detector class along with a directory, + and optionally a confidence interval, image size, and tag information (name and color). + It returns a list of subfolders in that directory sorted by how confident the + given Detector was was in predicting bouding boxes on files within that subfolder. + It also generates VOTT JSON tags corresponding to the predicted bounding boxes. + The optional confidence interval and image size correspond to the matching optional + arguments to the Detector class + ''' + basedir = Path(basedir) + CV2_COLOR_LOAD_FLAG = 1 + all_predictions = [] + if user_folders: + # TODO: Cross reference with ToTag + # download latest tagging and tagged + if cur_tagged is not None: + with open(cur_tagged, 'r') as file: + reader = csv.reader(file) + next(reader, None) + all_tagged = list(reader) + if cur_tagging is not None: + with open(cur_tagging, 'r') as file: + reader = csv.reader(file) + next(reader, None) + all_tagged.extend(list(reader)) + already_tagged = defaultdict(set) + for row in all_tagged: + already_tagged[row[FOLDER_LOCATION]].add(row[0]) + subdirs = [subfile for subfile in basedir.iterdir() if subfile.is_dir()] + all_names = [] + all_image_files = [] + all_sizes = [] + for subdir in subdirs: + cur_image_names = list(subdir.rglob(filetype)) + all_image_files += [str(image_name) for image_name in cur_image_names] + foldername = subdir.stem + all_names += [(foldername, filename.name) for filename in cur_image_names] + # Reversed because numpy is row-major + all_sizes = [cv2.imread(image, CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files] + all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) + for curindex, image in enumerate(all_image_files): + all_images[curindex] = cv2.resize(cv2.imread(image, CV2_COLOR_LOAD_FLAG), image_size) + all_predictions = detector.predict(all_images, min_confidence=min_confidence) + else: + with open(cur_tagged, 'r') as file: + reader = csv.reader(file) + next(reader, None) + already_tagged = {row[0] for row in reader} + logging.info("\nFound {} rows in tagged data".format(len(already_tagged))) + with open(cur_tagging, 'r') as file: + reader = csv.reader(file) + next(reader, None) + already_tagged |= {row[0] for row in reader} + logging.info("\nIncreased row count to {} for based on 'in progress' data".format(len(already_tagged))) + all_image_files = list(basedir.rglob(filetype)) + logging.info("\nFound '{}' images of EXACT filetype '{}'".format(len(all_image_files),filetype)) + all_names = [filename.name for filename in all_image_files] + all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files] + all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8) + for curindex, image in enumerate(all_image_files): + all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size) + all_predictions = detector.predict(all_images, batch_size=2, min_confidence=min_confidence) + make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, user_folders) + +if __name__ == "__main__": + import sys + import os + train_dir = str(Path.cwd().parent / "train") + if train_dir not in sys.path: + sys.path.append(train_dir) + from tf_detector import TFDetector + import re + + #Set up logging + console = logging.StreamHandler() + log = logging.getLogger() + log.setLevel(os.environ.get("LOGLEVEL",'DEBUG')) #Set in config + log.addHandler(console) + + # Allow us to import utils + config_dir = str(Path.cwd().parent / "utils") + if config_dir not in sys.path: + sys.path.append(config_dir) + from config import Config + if len(sys.argv)<2: + raise ValueError("Need to specify config file") + config_file = Config.parse_file(sys.argv[1]) + + image_dir = config_file["image_dir"] + untagged_output = config_file["untagged_output"] + tagged_output = config_file["tagged_predictions"] + classification_names = config_file["classes"].split(",") + inference_graph_path = str(Path(config_file["inference_output_dir"])/"frozen_inference_graph.pb") + supported_file_type = config_file["filetype"] + + #TODO: Make sure $PYTHONPATH has this in it --> /opt/caffe/python:/opt/caffe2/build: + + #TODO: make sure tagged.csv exists + cur_tagged = config_file["tagged_output"] + + # These are the "tagging in progress" labels. Meaning they will have null labels and class names + # This file needs to exist even if it's empty + cur_tagging = config_file["tagging_output"] # This is a new config key we are adding for training V2 + + logging.info("\n****Initializing TF Detector...****") + cur_detector = TFDetector(classification_names, inference_graph_path) + logging.info("\n****Initializing TF Detector DONE****") + + logging.info("\n****Creating Suggestions****") + get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, filetype=supported_file_type, min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"]=="True") + logging.info("\n****Creating Suggestions DONE****") \ No newline at end of file diff --git a/train_vnext/create_tf_record.py b/train_vnext/create_tf_record.py new file mode 100644 index 00000000..ef7fabfb --- /dev/null +++ b/train_vnext/create_tf_record.py @@ -0,0 +1,154 @@ +from collections import defaultdict +import tensorflow as tf +import numpy as np +import csv +import hashlib +from pathlib import Path +import re +import sys +import os +import logging + +FOLDER_LOCATION = 8 +HEIGHT_LOCATION = 6 +WIDTH_LOCATION = 7 + +def int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + +def bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + +def float_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + +def create_tf_example(predictions, raw_img, tag_map): + filename = predictions[0][0] + height = int(predictions[0][HEIGHT_LOCATION]) + width = int(predictions[0][WIDTH_LOCATION]) + key = hashlib.sha256(raw_img).hexdigest() + xmin = [] + ymin = [] + xmax = [] + ymax = [] + classes = [] + classes_text = [] + truncated = [] + poses = [] + difficult_obj = [] + for prediction in predictions: + if prediction[1]!="NULL": + ymin.append(float(prediction[4])) + xmin.append(float(prediction[2])) + ymax.append(float(prediction[5])) + xmax.append(float(prediction[3])) + tag_name = prediction[1] + classes_text.append(tag_name.encode('utf8')) + classes.append(tag_map[tag_name]) + truncated.append(0) + poses.append("Unspecified".encode('utf8')) + difficult_obj.append(0) + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': int64_feature([height]), + 'image/width': int64_feature([width]), + 'image/filename': bytes_feature([ + filename.encode('utf8')]), + 'image/source_id': bytes_feature([ + filename.encode('utf8')]), + 'image/key/sha256': bytes_feature([key.encode('utf8')]), + 'image/encoded': bytes_feature([raw_img]), + 'image/format': bytes_feature(['jpeg'.encode('utf8')]), + 'image/object/bbox/xmin': float_feature(xmin), + 'image/object/bbox/xmax': float_feature(xmax), + 'image/object/bbox/ymin': float_feature(ymin), + 'image/object/bbox/ymax': float_feature(ymax), + 'image/object/class/text': bytes_feature(classes_text), + 'image/object/class/label': int64_feature(classes), + 'image/object/difficult': int64_feature(difficult_obj), + 'image/object/truncated': int64_feature(truncated), + 'image/object/view': bytes_feature(poses), + })) + return example + +def create_tf_record(pred_file, record_file, image_loc, user_folders, split_names=["train","val"], + split_percent=[.7,.3], tag_names = ["stamp"], test_file=None): + + record_file = Path(record_file) + with open(pred_file, 'r') as file: + reader = csv.reader(file) + next(reader, None) + all_preds = list(reader) + + all_files = defaultdict(list) + if test_file is not None: + with open(test_file, 'r') as file: + reader = csv.reader(file) + next(reader, None) + all_test = set((row[0] for row in reader)) + for row in all_preds: + if row[0] not in all_test: + all_files[row[0]].append(row) + else: + for row in all_preds: + all_files[row[0]].append(row) + + rand_list = list(all_files) + np.random.shuffle(rand_list) + split_percent = np.cumsum(split_percent) + split_percent = split_percent[:-1] + split_percent *= len(rand_list) + split_percent = split_percent.round().astype(np.int) + split_preds = np.split(rand_list,split_percent) + + tag_map = {name: index for index, name in enumerate(tag_names, 1)} + + for name, filenames in zip(split_names, split_preds): + tf_record_file_path = "{}_{}".format(record_file.with_suffix(''), name) + record_file.suffix + logging.info("Creating TF record {}".format(tf_record_file_path)) + writer = tf.python_io.TFRecordWriter(tf_record_file_path) + for filename in filenames: + predictions = all_files[filename] + if user_folders: + file_loc = str(Path(image_loc)/predictions[0][FOLDER_LOCATION]/filename) + else: + file_loc = str(Path(image_loc)/filename) + logging.debug("Using image at {}".format(file_loc)) + with open(file_loc, "rb") as img_file: + raw_img = img_file.read() + tf_example = create_tf_example(predictions, raw_img, tag_map) + writer.write(tf_example.SerializeToString()) + + writer.close() + +if __name__ == "__main__": + import sys + import os + # Allow us to import utils + config_dir = str(Path.cwd().parent / "utils") + if config_dir not in sys.path: + sys.path.append(config_dir) + from config import Config + if len(sys.argv)<2: + raise ValueError("Need to specify config file") + config_file = Config.parse_file(sys.argv[1]) + + log = logging.getLogger() + log.setLevel(os.environ.get("LOGLEVEL", "INFO")) # or get the log level from the config file + + tagged_csv = config_file["tagged_output"] + tf_record_output = config_file["tf_record_location"] + all_downloaded_images = config_file["image_dir"] + use_folders = False #config_file["user_folders"] + list_of_class_names = config_file["classes"].split(",") + + if not os.path.isfile(tagged_csv): + print("Annotated labels file doesn't exist. Run set up to download all dependent training data") + + test_file_path = None + # if os.path.isfile(config_file["test_output"]): + # print("Test file exists") + # test_file_path = config_file["test_output"] + + create_tf_record(tagged_csv,tf_record_output,all_downloaded_images, + use_folders=="True", tag_names=list_of_class_names, test_file=test_file_path) diff --git a/train_vnext/map_validation.py b/train_vnext/map_validation.py new file mode 100644 index 00000000..d358e402 --- /dev/null +++ b/train_vnext/map_validation.py @@ -0,0 +1,225 @@ +import numpy as np +import cv2 +import csv +from collections import defaultdict +from functools import partial +from pathlib import Path +from pandas._libs.hashtable import unique_label_indices +HEIGHT, WIDTH = 1000, 1000 +FILENAME_LOCATION=0 +FOLDER_LOCATION=8 +CLASS_LOCATION=1 +PREDS_START=2 +PREDS_END=5 +BOX_CONFIDENCE_LOCATION=-2 + +def get_map_for_class(zipped_data_arr, min_ious=np.linspace(.50, 0.95, 10, endpoint=True), + avg_recalls = np.linspace(0.00, 1.00, 101, endpoint=True), nms_iou=.7): + # Used linspace over arange for min_ious/avg_recalls due to issues with endpoints + all_confs = [] + all_correct_preds = [] + num_total_detections = 0 + num_total_gtruths = 0 + for ground_arr, detector_arr in zipped_data_arr: + num_gtruths = len(ground_arr) + if not detector_arr: + num_total_gtruths+=num_gtruths + continue + detector_arr = np.asarray(detector_arr, dtype=np.float64) + # Sort by descending confidence, use mergesort to match COCO evaluation + detector_arr = detector_arr[detector_arr[:,-1].argsort(kind='mergesort')[::-1]] + det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose() + if nms_iou is not None: + # Code for NMS + all_indices_to_keep = [] + cur_indices_to_keep = np.arange(len(detector_arr)) + # Repeat until no detections left below overlap threshold + while cur_indices_to_keep.size>1: + # Add the most confident element + all_indices_to_keep.append(cur_indices_to_keep[0]) + cur_x_min = det_x_min[cur_indices_to_keep] + cur_x_max = det_x_max[cur_indices_to_keep] + cur_y_min = det_y_min[cur_indices_to_keep] + cur_y_max = det_y_max[cur_indices_to_keep] + intersect_widths = (np.minimum(cur_x_max[0], cur_x_max[1:]) - np.maximum(cur_x_min[0], cur_x_min[1:])).clip(min=0) + intersect_heights = (np.minimum(cur_y_max[0], cur_y_max[1:]) - np.maximum(cur_y_min[0], cur_y_min[1:])).clip(min=0) + intersect_areas = intersect_widths*intersect_heights + # Inclusion exclusion principle! + union_areas = ((cur_x_max[0]-cur_x_min[0])*(cur_y_max[0]-cur_y_min[0]) + (cur_x_max[1:]-cur_x_min[1:])*(cur_y_max[1:]-cur_y_min[1:])) - intersect_areas + # Just in case a ground truth has zero area + cur_ious = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas!=0) + # Keep appending [0] to a list + # Just say cur_indices = np where cur_ious < nms_iou + cur_indices_to_keep = cur_indices_to_keep[1:] + cur_indices_to_keep = np.intersect1d(cur_indices_to_keep, cur_indices_to_keep[np.nonzero(cur_ious < nms_iou)[0]], assume_unique=True) + if cur_indices_to_keep.size==1: + all_indices_to_keep.append(cur_indices_to_keep[0]) + detector_arr = detector_arr[np.asarray(all_indices_to_keep)] + det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose() + num_detections = len(detector_arr) + if not ground_arr: + num_total_detections+=num_detections + all_confs.append(confs) + continue + ground_arr = np.asarray(ground_arr, dtype=np.float64) + ground_x_min, ground_x_max, ground_y_min, ground_y_max = ground_arr.transpose() + # Clip negative since negative implies no overlap + intersect_widths = (np.minimum(det_x_max[:, np.newaxis], ground_x_max) - np.maximum(det_x_min[:, np.newaxis], ground_x_min)).clip(min=0) + intersect_heights = (np.minimum(det_y_max[:, np.newaxis], ground_y_max) - np.maximum(det_y_min[:, np.newaxis], ground_y_min)).clip(min=0) + intersect_areas = intersect_widths*intersect_heights + # Inclusion exclusion principle! + union_areas = ((det_x_max-det_x_min)*(det_y_max-det_y_min))[:, np.newaxis] + ((ground_x_max-ground_x_min)*(ground_y_max-ground_y_min)) - intersect_areas + # Just in case a ground truth has zero area + iou = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas!=0) + # Defined best ground truth as one with highest IOU. This is an array of size num_detections, where + # best_gtruths[i] is the index of the ground truth to which prediction i is most similar (highest IOU) + best_gtruths = np.argmax(iou, axis=1) + # valid_preds is a generator where each element is a numpy int array. Each numpy array corresponds to + # a min_iou in the min_ious array, and has indices corresponding to the predictions whose + # prediction-ground truth pairs have IOU greater than that min_iou. + valid_preds = map(np.nonzero, iou[np.arange(num_detections), best_gtruths]>min_ious[:, np.newaxis]) + # + ## Useful for standard precision/recall metrics + # num_true_positives = np.count_nonzero(np.bincount(best_gtruths[valid_preds])) + # num_false_positives = num_detections - detected_gtruths + # num_false_negatives = num_gtruths - detected_gtruths + # + # best_gtruths[valid_preds] uses the previously calculated valid_preds array to return an array + # containing the ground truths indices for each prediction whenever the ground truth-prediction + # IOU was greater than min_iou. Then unique_label_indices is used to find the leftmost occuring + # ground truth index for each ground truth index, which corresponds to finding the true positives + # (since we only consider the highest confidence prediction for each ground truth to be a true + # positive, rest are false positives) + # Note that pandas unique_label_indices is equivalent to np.unique(labels, return_index=True)[1] but + # is considerably faster due to using a hashtable instead of sorting + # Once the indices of the true positive predictions are found in the smaller array containing only + # predictions with IOU > min_iou, they are converted back into indices for the original array + # using valid_pred. + correct_preds = [valid_pred[0][unique_label_indices(best_gtruths[valid_pred[0]])]+num_total_detections for valid_pred in valid_preds] + all_correct_preds.append(correct_preds) + all_confs.append(confs) + num_total_detections += num_detections + num_total_gtruths += num_gtruths + # Edge case of no predictions for a class + if not all_confs: + return 0 + # Concatenates all predictions and confidences together to find class MAP + all_confs = np.concatenate(all_confs) + all_correct_preds = [np.concatenate(cur_pred) for cur_pred in zip(*all_correct_preds)] + # Sets only correct prediction indices to true, rest to false. + true_positives = np.zeros((len(min_ious), num_total_detections), dtype=bool) + for iou_index, positive_locs in enumerate(all_correct_preds): + true_positives[iou_index][positive_locs]=True + # Mergesort is chosen to be consistent with coco/matlab results + sort_order = all_confs.argsort(kind='mergesort')[::-1] + true_positives = true_positives[:,sort_order] + # Keeps track of number of true positives until each given point + all_true_positives = np.cumsum(true_positives, axis=1) + # PASCAL VOC 2012 + if avg_recalls is None: + # Zero pad both sides to calculate area under curve + precision = np.zeros((len(min_ious), num_total_detections+2), dtype=np.float64) + # Pad one side with zeros and the other with ones for area under curve + recall = np.zeros((len(min_ious), num_total_detections+2), dtype=np.float64) + recall[:,-1] = np.ones(len(min_ious), dtype=np.float64) + # In python >=3 this is equivalent to np.true_divide + precision[:,1:-1] = all_true_positives / np.arange(1, num_total_detections+1) + # Makes each element in precision list max of all elements to right (ignores endpoints) + precision[:,1:-1] = np.maximum.accumulate(precision[:,-2:0:-1], axis=1)[:,::-1] + recall[:,1:-1] = all_true_positives / num_total_gtruths + # Calculate area under P-R curve for each IOU + # Should only be one IOU at .5 for PASCAL + all_areas = [] + for cur_recall, cur_precision in zip(recall, precision): + # Find indices where value of recall changes + change_points = np.nonzero(cur_recall[1:]!=cur_recall[:-1])[0] + # Calculate sum of dw * dh as area and append to all areas + all_areas.append(np.sum((cur_recall[change_points+1] - cur_recall[change_points]) * cur_precision[change_points+1])) + return np.mean(all_areas) + # PASCAL VOC 2007 + else: + # The extra zero is to deal with a recall larger than is achieved by model + precision = np.zeros((len(min_ious), num_total_detections+1), dtype=np.float64) + # In python >=3 this is equivalent to np.true_divide + precision[:,:-1] = all_true_positives / np.arange(1, num_total_detections+1) + # Makes each element in precision list max of all elements to right (extra zero at right doesn't matter) + precision = np.maximum.accumulate(precision[:,::-1], axis=1)[:,::-1] + recall = all_true_positives / num_total_gtruths + # For each recall, finds leftmost index (i.e. largest precision) greater than it + indices_to_average = np.apply_along_axis(np.searchsorted, 1, recall, avg_recalls) + # Finds matching largest prediction for each recall and turns it into an array + precs_to_average = precision[np.arange(len(precision))[:,np.newaxis], indices_to_average] + # Returns average precision over each recall and over each IOU. Can specify an axis + # if separate average precision is wanted for each IOU (e.g. to do more precise statistics) + return np.mean(precs_to_average) + +def detectortest(predictions, ground_truths, output, user_folders): + '''Inputs test_detector that follows the Detector ABC, images which is + a list of image filenames, image_size which is the resized image size + necessary for inputting and ground_truths which is the correct labels + for the images. Optionally takes in min_fscore. + Outputs a boolean based on whether or not the F1-Score + of test_detector was greater than min_fscore''' + # First defaultdict corresponds to class name, inner one corresponds to filename, first list in tuple + # corresponds to ground truths for that class+file and second list corresponds to predictions + all_boxes = defaultdict(lambda: defaultdict(lambda: ([],[]))) + files_in_ground_truth = set() + with open(ground_truths, 'r') as truths_file: + reader = csv.reader(truths_file) + next(reader, None) + if user_folders: + for row in reader: + all_boxes[row[CLASS_LOCATION]][(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][0].append(row[PREDS_START:PREDS_END+1]) + files_in_ground_truth.add((row[FOLDER_LOCATION], row[FILENAME_LOCATION])) + else: + for row in reader: + all_boxes[row[CLASS_LOCATION]][row[FILENAME_LOCATION]][0].append(row[PREDS_START:PREDS_END+1]) + files_in_ground_truth.add(row[FILENAME_LOCATION]) + with open(predictions, 'r') as preds_file: + reader = csv.reader(preds_file) + next(reader, None) + if user_folders: + for row in reader: + if (row[FOLDER_LOCATION], row[FILENAME_LOCATION]) in files_in_ground_truth: + all_boxes[row[CLASS_LOCATION]][(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][1].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1]) + else: + for row in reader: + if row[FILENAME_LOCATION] in files_in_ground_truth: + all_boxes[row[CLASS_LOCATION]][row[FILENAME_LOCATION]][1].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1]) + all_class_maps = {} + for classname, all_file_preds in all_boxes.items(): + class_map = get_map_for_class(all_file_preds.values(), avg_recalls=None, min_ious=np.asarray([.5])) + all_class_maps[classname] = class_map + # Calculates average over all classes. This is the mAP for the test set. + avg_map = sum(all_class_maps.values())/len(all_class_maps) if all_class_maps else 0 + print('Class Name: Average, AP: {}'.format(avg_map)) + print('\n'.join('Class Name: {}, AP: {}'.format(*classdata) for classdata in all_class_maps.items())) + with open(output, 'w') as out_file: + csv_writer=csv.writer(out_file) + csv_writer.writerow(['Class Name','AP']) + csv_writer.writerow(['Average', avg_map]) + for classdata in all_class_maps.items(): + csv_writer.writerow(classdata) +if __name__ == "__main__": + import re + import sys + import os + # Allow us to import utils + config_dir = str(Path.cwd().parent / "utils") + if config_dir not in sys.path: + sys.path.append(config_dir) + from config import Config + if len(sys.argv)<2: + raise ValueError("Need to specify config file") + config_file = Config.parse_file(sys.argv[1]) + + test_file = config_file["test_output"] #Can be null + pre_training_annotations = config_file["tagged_output"] + post_training_annotations = config_file["tagged_predictions"] + validation_output_path = config_file["validation_output"] + + if os.path.isfile(test_file): + detectortest(post_training_annotations, test_file, validation_output_path, config_file["user_folders"]=="True") + else: + # TODO: If we keep track of val/train we can calc prec/f-score for that too + detectortest(post_training_annotations, pre_training_annotations, validation_output_path, config_file["user_folders"]=="True") diff --git a/train_vnext/training.py b/train_vnext/training.py new file mode 100644 index 00000000..20c62776 --- /dev/null +++ b/train_vnext/training.py @@ -0,0 +1,301 @@ +import argparse +import os +import csv +import requests +import datetime +from azure.storage.blob import BlockBlobService, ContentSettings +from utils.blob_utils import BlobStorage +from utils.config import Config +from .validate_config import get_legacy_config, initialize_training_location +import urllib.request +from urlpath import URL +import sys +import time +import jsonpickle +import json +from functions.pipeline.shared.db_access import ImageTagState, PredictionLabel, TrainingSession, Tag + +CONFIG_PATH = os.environ.get('ALCONFIG', None) + +def train(legacy_config, user_name, function_url): + + # First, downloxad data necessary for training + training_data = download_data_for_training(user_name, function_url) + + # Make sure directory is clean: + file_location = initialize_training_location(legacy_config) + + # Grab tagged and totag images from the blob storage + download_images(training_data["imageURLs"], legacy_config.get('image_dir')) + + # create csv file from this data + convert_tagged_labels_to_csv(training_data["taggedLabelData"],legacy_config.get('tagged_output')) + convert_tagging_labels_to_csv(training_data["taggingLabelData"], legacy_config.get('tagging_output')) + + #create label map + create_pascal_label_map(legacy_config.get('label_map_path'),legacy_config.get('classes').split(",")) + + +def download_images(image_urls, folder_location): + folder_location = os.path.expanduser(folder_location) + print("Syncing images to " + folder_location) + + if not os.path.exists(folder_location): + print("Directory doesn't exist so downloading all images may take a few minutes...") + os.makedirs(folder_location) + + existing_files = {os.path.relpath(os.path.join(directory, cur_file), folder_location) for (directory, _, filenames) + in os.walk(folder_location) for cur_file in filenames} + + try: + for image_url in image_urls: + parsed_url = URL(image_url) + file_name = parsed_url.name + if file_name not in existing_files: + with urllib.request.urlopen(image_url) as response, open(folder_location + '/' + str(file_name), 'wb') as out_file: + data = response.read() # a `bytes` object + out_file.write(data) + except Exception as e: + print("An error occurred attempting to download image at {0}:\n\n{1}".format(image_url,e)) + raise + print("Synced images into " + folder_location) + + +def download_data_for_training(user_name, function_url): + print("Downloading data for training, this may take a few moments...") + # Download all images to begin training + query = { + "userName": user_name, + "tagStatus": [ int(ImageTagState.READY_TO_TAG), + int(ImageTagState.TAG_IN_PROGRESS), + int(ImageTagState.COMPLETED_TAG), + int(ImageTagState.INCOMPLETE_TAG)] + } + url = function_url + '/api/images' + response = requests.get(url, params=query) + all_images_json = response.json() + image_urls_to_download = [info['location'] for info in all_images_json] + + # Download upto 200 images that have been tagged, for training + query['tagStatus'] = ImageTagState.COMPLETED_TAG + query['imageCount'] = 200 + url = function_url + '/api/labels' + response = requests.get(url, params=query) + tagged_label_data = response.json() + + tagging_image_data = set([get_image_name_from_url(item['location']) for item in all_images_json if item['tagstate'] == ImageTagState.TAG_IN_PROGRESS]) + return { "imageURLs": image_urls_to_download, + "taggedLabelData": tagged_label_data, + "taggingLabelData": tagging_image_data } + +def convert_tagging_labels_to_csv(filenames, tagging_output_file_path): + try: + if not os.path.exists(tagging_output_file_path): + dir_name = os.path.dirname(tagging_output_file_path) + if not os.path.exists(dir_name): + os.makedirs(dir_name) + with open(tagging_output_file_path, 'w') as csvfile: + for img in filenames: + csvfile.write(img + '\n') + print("Created tagging csv file: " + tagging_output_file_path) + except Exception as e: + print("An error occurred attempting to write to file at {0}:\n\n{1}".format(tagging_output_file_path,e)) + raise + +def convert_tagged_labels_to_csv(data, tagged_output_file_path): + try: + if not os.path.exists(tagged_output_file_path): + dir_name = os.path.dirname(tagged_output_file_path) + if not os.path.exists(dir_name): + os.makedirs(dir_name) + with open(tagged_output_file_path, 'w') as csvfile: + filewriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL) + filewriter.writerow(['filename','class','xmin','xmax','ymin','ymax','height','width']) + for img in data: + imagelocation = get_image_name_from_url(img["imagelocation"]) + image_height = img["image_height"] + image_width = img["image_width"] + for label in img["labels"]: + # Create new tag and convert it to relative coordinates + tag = Tag(label["classificationname"], float(label['x_min']), float(label['x_max']), float(label['y_min']), float(label['y_max'])) + # Save it in relative coordinates for training scripts + tag.convert_to_relative(int(image_width), int(image_height)) + data = [imagelocation, + tag.classificationname, + tag.x_min, + tag.x_max, + tag.y_min, + tag.y_max, + image_height, + image_width] + filewriter.writerow(data) + print("Created tagged csv file: " + tagged_output_file_path) + except Exception as e: + print("An error occurred attempting to write to file at {0}:\n\n{1}".format(tagged_output_file_path,e)) + raise + +def upload_data_post_training(prediction_output_file, classification_name_to_class_id, training_id, user_name,function_url): + query = { + "userName": user_name, + "trainingId": training_id + } + function_url = function_url + "/api/labels" + payload_json = process_post_training_csv(prediction_output_file, training_id, classification_name_to_class_id) + requests.post(function_url, params=query, json=payload_json) + print("Uploaded prediction labels to db.") + +def process_post_training_csv(csv_path, training_id, classification_name_to_class_id): + payload_json = [] + with open(csv_path) as f: + reader = csv.reader(f) + next(reader, None) #Skip header + for row in reader: + class_name = row[1] + if class_name in classification_name_to_class_id: + # Convert the bounding boxes to be with respect to image dimensions + prediction_label = PredictionLabel(training_id, + int(row[0].split('.')[0]), + classification_name_to_class_id[class_name], + float(row[2]), + float(row[3]), + float(row[4]), + float(row[5]), + int(row[6]), + int(row[7]), + float(row[8]), + float(row[9])) + # Save in the database in absolute terms to have parity + prediction_label.convert_to_absolute() + payload_json.append(prediction_label) + return jsonpickle.encode(payload_json, unpicklable=False) + +def save_training_session(config, model_location, perf_location, prediction_labels_location): + # First, upload the model to blob storage + cur_date_time = "{date:%Y_%m_%d_%H_%M_%S}".format(date=datetime.datetime.utcnow()) + file_name = "frozen_inference_graph_" + cur_date_time + ".pb" + model_location = upload_model_to_blob_storage(config, model_location, file_name, config.get("tagging_user")) + + # Get the mapping for class name to class id + overall_average, classification_name_to_class_id, avg_dictionary = process_classifications(perf_location, config.get("tagging_user"), config.get("url")) + + # Create a new training session in db and get its id + training_id = construct_new_training_session(perf_location, classification_name_to_class_id, overall_average, cur_date_time, model_location, avg_dictionary, config.get("tagging_user"), config.get("url")) + + # Upload prediction labels to the db + upload_data_post_training(prediction_labels_location, classification_name_to_class_id, training_id, config.get("tagging_user"), config.get("url")) + +def upload_model_to_blob_storage(config, model_location, file_name, user_name): + try: + blob_storage = BlobStorage.get_azure_storage_client(config) + blob_metadata = { + "userFilePath": model_location, + "uploadUser": user_name + } + uri = 'https://' + config.get("storage_account") + '.blob.core.windows.net/' + config.get("storage_perm_container") + '/' + file_name + blob_storage.create_blob_from_path( + config.get("storage_perm_container"), + file_name, + model_location, + metadata=blob_metadata + ) + print("Model uploaded at " + str(uri)) + return uri + except Exception as e: + print("Issue uploading model to cloud storage: {}",e) + raise + +def construct_new_training_session(perf_location, classification_name_to_class_id, overall_average, training_description, model_location, avg_dictionary, user_name, function_url): + try: + training_session = TrainingSession(training_description, model_location, overall_average, avg_dictionary) + query = { + "userName": user_name + } + function_url = function_url + "/api/train" + payload = jsonpickle.encode(training_session, unpicklable=False) + response = requests.post(function_url, params=query, json=payload) + response.raise_for_status() + training_id = int(response.json()) + print("Created a new training session with id: " + str(training_id)) + return training_id + except requests.exceptions.HTTPError as e: + print("HTTP Error when saving training session: {}",e.response.content) + raise + except Exception as e: + print("Issue saving training session: {}",e) + raise + +def process_classifications(perf_location, user_name,function_url): + try: + # First build query string to get classification map + classes = "" + query = { + "userName": user_name + } + function_url = function_url + "/api/classification" + overall_average = 0.0 + with open(perf_location) as f: + content = csv.reader(f, delimiter=',') + next(content, None) #Skip header + for line in content: + class_name = line[0].strip() + if class_name == "Average": + overall_average = line[1] if line[1].isdigit() else 0 + elif class_name not in classes and class_name != "NULL": + classes = classes + class_name + "," + + query["className"] = classes[:-1] + print("Getting classification map for classes " + query["className"]) + response = requests.get(function_url, params=query) + response.raise_for_status() + classification_name_to_class_id = response.json() + + # Now that we have classification map, build the dictionary that maps class id : average + avg_dictionary = {} + with open(perf_location) as csvfile: + reader = csv.reader(csvfile, delimiter=',') + next(reader, None) #Skip header + for row in reader: + if row[0] != "NULL" and row[0] in classification_name_to_class_id: + avg_dictionary[classification_name_to_class_id[row[0]]] = row[1] + + return overall_average, classification_name_to_class_id, avg_dictionary + except requests.exceptions.HTTPError as e: + print("HTTP Error when getting classification map: {}",e.response.content) + raise + except Exception as e: + print("Issue processing classfication: {}",e) + +def get_image_name_from_url(image_url): + start_idx = image_url.rfind('/')+1 + return image_url[start_idx:] + +def create_pascal_label_map(label_map_path: str, class_names: list): + with open(label_map_path, "w") as map_file: + for index, name in enumerate(class_names, 1): + map_file.write("item {{\n id: {}\n name: '{}'\n}}".format(index, name)) + print("Created Pascal VOC format file: " + label_map_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config-file', required=True) + parser.add_argument( + 'operation', + choices=['start', 'save'] + ) + args = parser.parse_args() + operation = args.operation + legacy_config = get_legacy_config(args.config_file) + config = Config.read_config(CONFIG_PATH) + + if operation == "start": + train(legacy_config, config.get("tagging_user"), config.get("url")) + elif operation == "save": + # The model is saved relative to the python_file_directory in + # ${inference_output_dir}/frozen_inference_graph.pb + path_to_model = os.path.join(legacy_config.get("python_file_directory"), + legacy_config.get("inference_output_dir"), + "frozen_inference_graph.pb") + save_training_session(config, + path_to_model, + legacy_config.get("validation_output"), + legacy_config.get("untagged_output")) \ No newline at end of file diff --git a/train_vnext/validate_config.py b/train_vnext/validate_config.py new file mode 100644 index 00000000..64f655aa --- /dev/null +++ b/train_vnext/validate_config.py @@ -0,0 +1,79 @@ +import os +import sys +from pathlib import Path +import pathlib +import shutil +path = os.path.join(os.path.dirname(__file__), os.pardir) +sys.path.append(path) +from utils.config import Config + +class IllegalArgumentError(ValueError): + pass + +def initialize_training_location(config): + file_tree = pathlib.Path(os.path.expanduser( + config.get("train_dir")) + ) + + if file_tree.exists(): + print("Removing existing tag data directory: " + str(file_tree)) + + shutil.rmtree(str(file_tree), ignore_errors=True) + + return pathlib.Path(file_tree) + +def validate_value(config: dict, key_name: str): + return_val = config[key_name] + if not return_val: + raise IllegalArgumentError("Need value for {} in legacy config file".format(key_name)) + return return_val + +def get_legacy_config(config_path: str) -> dict: + legacy_config_file = {} + try: + legacy_config_file = Config.parse_file(config_path) + except Exception as e: + print("An error occurred attempting to read to file at {0}:\n\n{1}".format(config_path,e)) + raise + + if legacy_config_file["user_folders"] == True: + raise IllegalArgumentError("Currently we do not support user folders. Change setting in {}".format(config_path)) + + current_dir_path = os.path.dirname(os.path.abspath(__file__)) + configured_path = legacy_config_file["python_file_directory"] + if os.path.abspath(configured_path) != current_dir_path: + msg = "Expected 'python_file_directory' to be {}".format(current_dir_path) + raise IllegalArgumentError(msg) + + #TODO: Validate that the images we have match the filetype + #TODO: Make sure the classifications exist in the DB + + keys_we_care_about = [ + "data_dir", + "tagged_output", + "tagging_output", + "image_dir", + "train_dir", + "filetype", + "classes", + "validation_output", + "untagged_output", + "inference_output_dir", + "python_file_directory", + "label_map_path" + ] + + result = {} + for key_name in keys_we_care_about: + os.environ[key_name] = validate_value(legacy_config_file,key_name) + result[key_name] = os.path.expandvars(os.environ[key_name]) + + return result + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--file-path', type=str) + args = parser.parse_args() + print(get_legacy_config(args.file_path)) + diff --git a/tutorial/functions/README.md b/tutorial/functions/README.md new file mode 100644 index 00000000..c3dc5127 --- /dev/null +++ b/tutorial/functions/README.md @@ -0,0 +1,7 @@ +## Azure Functions Tutorials + +Much of the Active Learning pipeline relies upon Python. One implementation of the +pipeline makes use of Azure Functions with the Python language. This set of documents +and tutorial show how to setup (and script) a Azure Function environment, inject +variables within the environment within which the functions run, how to access +secrets within Azure Keyvault. diff --git a/tutorial/functions/docs/setup/README.md b/tutorial/functions/docs/setup/README.md new file mode 100644 index 00000000..94138f79 --- /dev/null +++ b/tutorial/functions/docs/setup/README.md @@ -0,0 +1,7 @@ +## Working with Python and Azure Functions + +This project has scripts, hints, and tricks for working with Python and Azure +Functions. The project is broken down into multiple areas: + +- [Setting up one's environment](initial/README.md) +- [Using Keyvault for Secrets](security/keyvault/README.md) \ No newline at end of file diff --git a/tutorial/functions/docs/setup/deployment/README.md b/tutorial/functions/docs/setup/deployment/README.md new file mode 100644 index 00000000..fb01fa89 --- /dev/null +++ b/tutorial/functions/docs/setup/deployment/README.md @@ -0,0 +1,53 @@ +## Deploying a Python Azure Function application + +Assuming one has gone through the [instructions to setup the environment](../setup/initial/README.md) +for developing a Python based Azure Function, the skeleton applicaiton created with an `HttpTrigger` +app can be deployed. + +At it's simplest, deploying the function is to uses the Azure Functions CLI tools that were +previously installed. The command is `func azure functionapp publish $AZURE_FUNC_APP_NAME --force`. +The below assumes that one has activated the Python virtual environment. + +```bash +export AZURE_FUNC_APP_NAME=jmsazfunapp1 +$ func azure functionapp publish $AZURE_FUNC_APP_NAME --force +Getting site publishing info... +pip download -r /home/jims/code/src/github/jmspring/azure_python_functions/scratch/azfuncprj/testprj1/requirements.txt --dest /tmp/azureworkerda2e6auj +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 grpcio_tools==1.14.2 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 six==1.11.0 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 azure_functions==1.0.0a4 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 grpcio==1.14.2 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 setuptools==40.4.3 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 protobuf==3.6.1 +pip download --no-deps --only-binary :all: --platform manylinux1_x86_64 --python-version 36 --implementation cp --abi cp36m --dest /tmp/azureworkergtpit9y9 azure_functions_worker==1.0.0a4 + +Preparing archive... +Uploading content... +Upload completed successfully. +Deployment completed successfully. +Removing 'WEBSITE_CONTENTSHARE' from 'jmsazfunapp1' +Removing 'WEBSITE_CONTENTAZUREFILECONNECTIONSTRING' from 'jmsazfunapp1' +Syncing triggers... +``` + +Note the above uses the flag `--force` this is simply because the initial creation of the Azure +Function application set things up such that the Application use Azure Files for publishing +and deployment. Without the `--force`, the result would look something like: + +```bash +$ func azure functionapp publish $AZURE_FUNC_APP_NAME +Your app is configured with Azure Files for editing from Azure Portal. +To force publish use --force. This will remove Azure Files from your app. +``` + +This may be a Preview thing or it may be the default. As things mature, the above directions will +be cleaned up. + +Hitting the default web serice, using curl, the result is as follows: + +```bash +$ curl https://jmsazfunapp1.azurewebsites.net/api/httpfunc1?name=foo +Hello foo! +``` + +At this point, you have a Python function deployed to Azure Functions. diff --git a/tutorial/functions/docs/setup/initial/README.md b/tutorial/functions/docs/setup/initial/README.md new file mode 100644 index 00000000..18f0da7f --- /dev/null +++ b/tutorial/functions/docs/setup/initial/README.md @@ -0,0 +1,212 @@ +## Setting up a function environment + +In the following document, we explore using the scripts in this folder to walk through +setting up an environment for and deploying a python funciton into Azure Functions. +Python for Azure Functions is in preview at the moment, so, things may change. The +following prerequisites are assumed: + +- You have an active Azure account +- The prerequisites for working with Azure Functions have been installed, per [here](https://github.com/Azure/Azure-Functions/wiki/Azure-Functions-on-Linux-Preview) +- Per prerequisites, the scripts below assume one has created a resource group + +### Python for Azure Functions in practice + +One thing most of the documents around Python for Azure Functions do not call out is +the fact that function development requires a virtual environment to be setup. The +script `setup_environment.sh` does just that. The syntax is: + +```bash +$ ./setup_environment.sh +``` + +For purposes of this write up will use environment variables to illustrate the process, +we will first assume our virtual environment is in `~/python/venv/azfuncprj`, so the +above becomes: + +```bash +$ export VIRTUAL_ENV_DIR=~/python/venv/azfuncprj +$ ./setup_environment.sh $VIRTUAL_ENV_DIR +``` + +Running the above we get: + +```bash +$ export VIRTUAL_ENV_DIR=~/python/venv/azfuncprj +$ ./setup/setup_environment.sh $VIRTUAL_ENV_DIR +Running virtualenv with interpreter /usr/bin/python3.6 +Using base prefix '/usr' +New python executable in /home/jims/python/venv/azfuncprj/bin/python3.6 +Also creating executable in /home/jims/python/venv/azfuncprj/bin/python +Installing setuptools, pip, wheel...done. +To activate the virtualenv run: source python/venv/azfuncprj/bin/activate +``` + +### Creating the function project + +The next step in working with Python for Azure Functions is to create a project and a +function within that project. This is handled by the script `create_function_project.sh`. +For this funciton you will need to know: + +- The virtual environment path (created above), `VIRTUAL_ENV_DIR` +- The directory where you want the project to live, this will be defined with the + environment variable `PYFUNC_PROJECT_DIR` +- The project name, defined with the environment variable `PYFUNC_PROJECT_NAME` +- The name of the function within the project, `PYFUNC_FUNCTION_NAME` +- The type of function to create/function template. In this case we will use an + HttpTrigger app, so `PYFUNC_FUNCTION_TYPE=HttpTrigger` + +Invoking the `create_function_project.sh` script: + +```bash +$ export VIRTUAL_ENV_DIR=~/python/venv/azfuncprj +$ export PYFUNC_PROJECT_DIR=~/python/azfuncprj +$ export PYFUNC_PROJECT_NAME=testprj +$ export PYFUNC_FUNCTION_NAME=testhttpfunc +$ export PYFUNC_FUNCTION_TYPE=HttpTrigger +$ ./create_function_project.sh $VIRTUAL_ENV_DIR $PYFUNC_PROJECT_DIR $PYFUNC_PROJECT_NAME $PYFUNC_FUNCTION_NAME $PYFUNC_FUNCTION_TYPE +Installing wheel package +Installing azure-functions==1.0.0a4 package +Installing azure-functions-worker==1.0.0a4 package +Running pip freeze +Writing .gitignore +Writing host.json +Writing local.settings.json +Writing /home/jims/python/azfuncprj/testprj/.vscode/extensions.json +Select a language: Select a template: HttpTrigger +Function name: [HttpTriggerPython] Writing /home/jims/python/azfuncprj/azfuncprj/testprj/httpfunc1/sample.dat +Writing /home/jims/python/azfuncprj/testprj/httpfunc1/__init__.py +Writing /home/jims/python/azfuncprj/testprj/httpfunc1/function.json +The function "testhttpfunc" was created successfully from the "HttpTrigger" template. +Function httpfunc1 is created within project testprj1 +In order to operate with the function: + - Activate the virtual environment + - Change to ~/python/azfuncprj/testprj +``` + +### Create a Azure Function Application to host your Python function + +Per this [document](https://github.com/Azure/azure-functions-python-worker/wiki/Create-your-first-Python-function) +one can run functions locally. If one wants to actually host them in Azure, then one +needs to create an Azure Function application to do so. Outside of the prerequisites mentioned +above, this is the first piece of this setup that actually interacts with Azure itself. + +The script `create_function_app.sh` sets this up. It requires that a `resource group` has been +previously created. For purposes of Python support in Azure Functions, the resource group (during +preview) must be created in one of the following regions: + +- West US +- East US +- West Europe +- East Asia + +The example below will assume the `resource group` will be named `jmsazfunc1rg` and located +in `West US`. The Azure CLI command to do so is: + +```bash +export RESOURCE_GROUP_NAME=jmsazfunc1rg +export RESOURCE_GROUP_LOCATION=westus +$ az group create --name $RESOURCE_GROUP_NAME --location $RESOURCE_GROUP_LOCATION +{ + "id": "/subscriptions/3fee811e-11bf-4b5c-9c62-a2f28b517724/resourceGroups/jmsazfunc1rg", + "location": "westus", + "managedBy": null, + "name": "jmsazfunc1rg", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null +} +``` + +To create the Azure Function application, the following need to be defined: + +- Resource Group --> `export RESOURCE_GROUP_NAME=jmsazfunc1rg` +- Resource Group/Function Location --> `export RESOURCE_GROUP_LOCATION=westus` +- Storage Account Name --> `export STORAGE_ACCOUNT_NAME=jmsazfunc1sa` +- Azure Function Applicaiton Name --> `export AZURE_FUNC_APP_NAME=jmsazfunapp1` + +The storage account, `jmsazfun1sa`, is needed as that is the locaiton that the Azure Function +state is stored. The name of the applciaiton, for this example is `jmsazfunapp1`. Resource +group name and location were defined previously. + +Executing the script: + +```bash +$ export RESOURCE_GROUP_NAME=jmsazfunc1rg +$ export RESOURCE_GROUP_LOCATION=westus +$ export STORAGE_ACCOUNT_NAME=jmsazfunc1sa +$ export AZURE_FUNC_APP_NAME=jmsazfunapp1 +$ ./create_function_app.sh $RESOURCE_GROUP_NAME $RESOURCE_GROUP_LOCATION $STORAGE_ACCOUNT_NAME $AZURE_FUNC_APP_NAME +{ + "id": "/subscriptions/3fee811e-11bf-4b5c-9c62-a2f28b517724/resourceGroups/jmsazfunc1rg", + "location": "westus", + "managedBy": null, + "name": "jmsazfunc1rg", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null +} +{ + "accessTier": null, + "creationTime": "2018-10-08T18:52:49.001675+00:00", + "customDomain": null, + "enableHttpsTrafficOnly": false, + "encryption": { + "keySource": "Microsoft.Storage", + "keyVaultProperties": null, + "services": { + "blob": { + "enabled": true, + "lastEnabledTime": "2018-10-08T18:52:49.118545+00:00" + }, + "file": { + "enabled": true, + "lastEnabledTime": "2018-10-08T18:52:49.118545+00:00" + }, + "queue": null, + "table": null + } + }, + "id": "/subscriptions/3fee811e-11bf-4b5c-9c62-a2f28b517724/resourceGroups/jmsazfunc1rg/providers/Microsoft.Storage/storageAccounts/jmsazfunc1sa", + "identity": null, + "isHnsEnabled": null, + "kind": "Storage", + "lastGeoFailoverTime": null, + "location": "westus", + "name": "jmsazfunc1sa", + "networkRuleSet": { + "bypass": "AzureServices", + "defaultAction": "Allow", + "ipRules": [], + "virtualNetworkRules": [] + }, + "primaryEndpoints": { + "blob": "https://jmsazfunc1sa.blob.core.windows.net/", + "dfs": null, + "file": "https://jmsazfunc1sa.file.core.windows.net/", + "queue": "https://jmsazfunc1sa.queue.core.windows.net/", + "table": "https://jmsazfunc1sa.table.core.windows.net/", + "web": null + }, + "primaryLocation": "westus", + "provisioningState": "Succeeded", + "resourceGroup": "jmsazfunc1rg", + "secondaryEndpoints": null, + "secondaryLocation": null, + "sku": { + "capabilities": null, + "kind": null, + "locations": null, + "name": "Standard_LRS", + "resourceType": null, + "restrictions": null, + "tier": "Standard" + }, + "statusOfPrimary": "available", + "statusOfSecondary": null, + "tags": {}, + "type": "Microsoft.Storage/storageAccounts" +} +Your Linux, cosumption plan, function app 'jmsazfunapp1' has been successfully created but is not active until content is published usingAzure Portal or the Functions Core Tools. +``` \ No newline at end of file diff --git a/tutorial/functions/docs/setup/initial/create_function_app.sh b/tutorial/functions/docs/setup/initial/create_function_app.sh new file mode 100755 index 00000000..aac67db0 --- /dev/null +++ b/tutorial/functions/docs/setup/initial/create_function_app.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# This script sets up the azure function space that will host the Python +# functions developed. +# +# To set up the function, you need to do the following: +# +# - create a resource group +# - create a storage account +# - create the function app + +# Needed: +# - resource group name +# - location +# - storage account name +# - application name + +# Check inputs +if [ "$#" -ne "4" ]; then + echo "Usage: create_function_app.sh " + exit 1 +fi + +RG_NAME=$1 +LOCATION=$2 +SA_NAME=$3 +APP_NAME=$4 + +# Create the resource group +az group create --name $RG_NAME --location $LOCATION +if [ "$?" -ne 0 ]; then + echo "Unable to create resource group." + exit 1 +fi + +# Create a storage account in the resource group for the functions to store +# their state / metadata +az storage account create --name $SA_NAME --location $LOCATION --resource-group $RG_NAME --sku Standard_LRS +if [ "$?" -ne 0 ]; then + echo "Unable to create storage account." + exit 1 +fi + +# Create the functions app. This is the environment the functions will fu +az functionapp createpreviewapp --resource-group $RG_NAME --consumption-plan-location $LOCATION --name $APP_NAME --storage-account $SA_NAME --runtime python --is-linux \ No newline at end of file diff --git a/tutorial/functions/docs/setup/initial/create_function_project.sh b/tutorial/functions/docs/setup/initial/create_function_project.sh new file mode 100755 index 00000000..8aeadbe0 --- /dev/null +++ b/tutorial/functions/docs/setup/initial/create_function_project.sh @@ -0,0 +1,91 @@ +#!/bin/bash +set -e + +# This script creates an initial function once it ensures that the requirements +# are installed. The requirements are: +# +# - Azure Functions Core Tools 2.x +# - Azure CLI 2.0 or later +# +# Once the requirements are verified, the following steps are performed: +# +# - Invoke the virtual environment +# - Create the function project +# - Create a new function within the project + +# Check usage +if [ "$#" -ne "5" ]; then + echo "Usage: create_function_project.sh " + exit 1 +fi + +# Check Azure Functions Core Tools +which func >& /dev/null +if [ "$?" -ne "0" ]; then + echo "Azure Functions Core Tools required" + exit 1 +fi +func azure functionapp --help >& /dev/null +if [ "$?" -ne "0" ]; then + echo "func exists, does not appear to be part of Azure Functions Core Tools" + exit 1 +fi + +# Check for Azure CLI +which az >& /dev/null +if [ "$?" -ne "0" ]; then + echo "Azure CLI 2.0 or more recent required." + exit 1 +fi +az --version | grep "azure-cli" | grep "(2." >& /dev/null +if [ "$?" -ne "0" ]; then + echo "Require 2.x or newer version of Azure CLI" + exit 1 +fi + +# command line options +VIRTENV_DIR=$1 +PROJECT_DIR=$2 +PROJECT_NAME=$3 +FUNCTION_NAME=$4 +FUNCTION_TEMPLATE=$5 + +# Verify the virtual environment is there and activate it +if [ ! -d "$VIRTENV_DIR" ] || [ ! -e "$VIRTENV_DIR/bin/activate" ]; then + echo "Please setup virtual environment" + exit 1 +fi +source "$VIRTENV_DIR/bin/activate" +if [ "$?" -ne "0" ]; then + echo "Error activating virtual environment" + exit 1 +fi + +# Create the project directory and change to it +mkdir -p $PROJECT_DIR +if [ "$?" -ne "0" ]; then + echo "Unable to create project directory" + exit 1 +fi +cd $PROJECT_DIR + +# Create the function +func init $PROJECT_NAME --worker-runtime python +if [ "$?" -ne 0 ]; then + echo "Error initializing project" + exit 1 +fi +cd $PROJECT_NAME + +# Create the function +func new --name $FUNCTION_NAME --template "$FUNCTION_TEMPLATE" +if [ "$?" -ne 0 ]; then + echo "Error creating the new function" + exit 1 +fi + +# Instructions for continuing +echo "Function $FUNCTION_NAME is created within project $PROJECT_NAME" +echo "In order to operate with the function:" +echo " - Activate the virtual environment" +echo " - Change to $PROJECT_DIR/$PROJECT_NAME" diff --git a/tutorial/functions/docs/setup/initial/setup_environment.sh b/tutorial/functions/docs/setup/initial/setup_environment.sh new file mode 100755 index 00000000..7bd19ac3 --- /dev/null +++ b/tutorial/functions/docs/setup/initial/setup_environment.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Azure Python Functions require that a Python virtual environment be setup +# prior to initializing and creating the function. +# +# This script will setup and activate the specified virtual environment. If +# specified, the virtual environment will be activated. +# +# Usage: setup_environment.sh +# +if [ "$#" -ne "1" ]; then + echo "Usage: setup_environment.sh " + exit 1 +fi + +# Is machine configured properly? Requirements: +# - Python 3.6 or later +# - virtualenv installed + +# Determine Python version +which python >& /dev/null +if [ "$?" -eq "0" ]; then + PYTHON_VERSION=`python --version | awk '{print $2}'` + if [ "$PYTHON_VERSION" -lt "3.6" ]; then + PYTHON_VERSION="" + fi +fi +if [ -z "$PYTHON_VERSION" ] || [ "$PYTHON_VERSION" -lt "3.6" ]; then + which python3 >& /dev/null + if [ "$?" -eq "0" ]; then + PYTHON_VERSION=`python3 --version | awk '{print $2}'` + fi +fi +if [ -z "$PYTHON_VERSION" ]; then + echo "Python 3.6 or later must be installed." + exit 1 +elif [ $(echo "${PYTHON_VERSION:0:3}<3.6" | bc) -eq 1 ]; then + echo "Python 3.6 or later must be installed. Version installed is $PYTHON_VERSION" + exit 1 +fi + +# Check if virtualenv is installed +virtualenv --version >& /dev/null +if [ "$?" -ne "0" ]; then + echo "virtualenv must be installed" + exit 1 +fi + +# Create virtualenv +VIRTUALENV_DIR="$1" +virtualenv --python=python${PYTHON_VERSION:0:3} $VIRTUALENV_DIR + +# How to activate the virtualenv +echo "To activate the virtualenv run: source $VIRTUALENV_DIR/bin/activate" \ No newline at end of file diff --git a/tutorial/functions/docs/setup/security/keyvault/README.md b/tutorial/functions/docs/setup/security/keyvault/README.md new file mode 100644 index 00000000..a9f237a3 --- /dev/null +++ b/tutorial/functions/docs/setup/security/keyvault/README.md @@ -0,0 +1,286 @@ +## Using Azure Keyvault to Store and Manage Secrets in Python Azure Functions + +One issue with service lifecycle, including functions, is how to handle secrets. One +solution is to inject the secrets into a particular script at time of deployment. +Another is to use checked in configuration files with variations based upon deployment +type. Yet another, if dealing with Docker based applications (or similar) is to +inject values as environment variables -- this is usually the cleanest option. + +However, with Azure Functions, the environment variables are a hard coded configuration +file which requires management. Another option is to use a service that protects +secrets, like Azure Keyvault. + +In the first release of functions and those not using the Linux Management Plan, MSI +or Managed Service Identity is available. What MSI does is explicitly gives your +application an Identity that can then be used to access additional services. In the +case of Keyvault, one can configure the Keyault to allow granular access to secrets +within the Keyvault, for instace Read-Only permissions on secrets. + +Since MSI is not currently supported in the version of functions we are currently +using, the functions will create what is referred to as a Service Principal (an +Azure AD entity) that has an ID and Password and we will assign that Serice +Principal access to the Keyault Secrets. +In fact, it is possible to configure access to and enable an Azure Function to use +Azure Keyvault through a service known as MSI or Managed Service Identity. In order +to enable this, one must first: + +- Create an Azure Keyvault +- Give your Azure Function application an identity +- Configure the Azure Function identity to have access to the Azure Keyvault + +Azure Keyvault has granular access policies. For this example we assume the +function just requires read access to secrets. + +Once the Keyvault is setup and configured, the function itself needs to be +configured to know the location of the Keyvault as well as the names of the +secrets to retrieve. This still requires configuring the environment, but it +does not expose the secrets outside of Keyvault. + +The script `setup_keyvault_and_app_permissions.sh` handles the three steps +above. In order to run it, it needs three commandline values: + +- Resource Group Name --> `RESOURCE_GROUP_NAME` +- Keyvault Name --> `KEYVAULT_NAME` +- Application Name --> `AZURE_FUNC_APP_NAME` + +For the example below, the values used for [this](../../setup/initial/README.md) +walkthrough are used, specifically for `RESOURCE_GROUP_NAME` and `AZURE_FUNC_APP_NAME`. +For the Keyvault Name, we will use the value `jmsfunckv` as the name. + +To launch the script: + +```bash +$ export RESOURCE_GROUP_NAME=jmsazfunc1rg +$ export KEYVAULT_NAME=jmsfunckv +$ export AZURE_FUNC_APP_NAME=jmsazfunapp1 +$ ./setup_keyvault_and_app_permissions.sh $RESOURCE_GROUP_NAME $KEYVAULT_NAME $AZURE_FUNC_APP_NAME +{ + "id": "/subscriptions/3fee811e-11bf-4b5c-9c62-a2f28b517724/resourceGroups/jmsazfunc1rg/providers/Microsoft.KeyVault/vaults/jmsfunckv1", + "location": "westus", + "name": "jmsfunckv1", + "properties": { + "accessPolicies": [ + { + "applicationId": null, + "objectId": "86a607ff-039e-497e-bab1-92247bc5ed02", + "permissions": { + "certificates": [ + "get", + "list", + "delete", + "create", + "import", + "update", + "managecontacts", + "getissuers", + "listissuers", + "setissuers", + "deleteissuers", + "manageissuers", + "recover" + ], + "keys": [ + "get", + "create", + "delete", + "list", + "update", + "import", + "backup", + "restore", + "recover" + ], + "secrets": [ + "get", + "list", + "set", + "delete", + "backup", + "restore", + "recover" + ], + "storage": [ + "get", + "list", + "delete", + "set", + "update", + "regeneratekey", + "setsas", + "listsas", + "getsas", + "deletesas" + ] + }, + "tenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47" + } + ], + "createMode": null, + "enablePurgeProtection": null, + "enableSoftDelete": null, + "enabledForDeployment": false, + "enabledForDiskEncryption": null, + "enabledForTemplateDeployment": null, + "networkAcls": null, + "provisioningState": "Succeeded", + "sku": { + "name": "standard" + }, + "tenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47", + "vaultUri": "https://jmsfunckv1.vault.azure.net/" + }, + "resourceGroup": "jmsazfunc1rg", + "tags": {}, + "type": "Microsoft.KeyVault/vaults" +} +Retrying role assignment creation: 1/36 +{ + "id": "/subscriptions/aaaaaaaa-11bf-4b5c-9c62-dddddddddddd/resourceGroups/jmsazfunc1rg/providers/Microsoft.KeyVault/vaults/jmsfunckv1", + "location": "westus", + "name": "jmsfunckv1", + "properties": { + "accessPolicies": [ + { + "applicationId": null, + "objectId": "86a607ff-039e-497e-bab1-92247bc5ed02", + "permissions": { + "certificates": [ + "get", + "list", + "delete", + "create", + "import", + "update", + "managecontacts", + "getissuers", + "listissuers", + "setissuers", + "deleteissuers", + "manageissuers", + "recover" + ], + "keys": [ + "get", + "create", + "delete", + "list", + "update", + "import", + "backup", + "restore", + "recover" + ], + "secrets": [ + "get", + "list", + "set", + "delete", + "backup", + "restore", + "recover" + ], + "storage": [ + "get", + "list", + "delete", + "set", + "update", + "regeneratekey", + "setsas", + "listsas", + "getsas", + "deletesas" + ] + }, + "tenantId": "aaaaaaaa-86f1-41af-cccc-dddddddddddd" + }, + { + "applicationId": null, + "objectId": "ad828a3b-5d98-41cf-81e0-f1530798513e", + "permissions": { + "certificates": null, + "keys": null, + "secrets": [ + "get" + ], + "storage": null + }, + "tenantId": "aaaaaaaa-86f1-41af-cccc-dddddddddddd" + } + ], + "createMode": null, + "enablePurgeProtection": null, + "enableSoftDelete": null, + "enabledForDeployment": false, + "enabledForDiskEncryption": null, + "enabledForTemplateDeployment": null, + "networkAcls": null, + "provisioningState": "Succeeded", + "sku": { + "name": "standard" + }, + "tenantId": "aaaaaaaa-86f1-41af-cccc-dddddddddddd", + "vaultUri": "https://jmsfunckv1.vault.azure.net/" + }, + "resourceGroup": "jmsazfunc1rg", + "tags": {}, + "type": "Microsoft.KeyVault/vaults" +} +[ + { + "name": "FUNCTIONS_WORKER_RUNTIME", + "slotSetting": false, + "value": "python" + }, + { + "name": "FUNCTIONS_EXTENSION_VERSION", + "slotSetting": false, + "value": "~2" + }, + { + "name": "AzureWebJobsStorage", + "slotSetting": false, + "value": "DefaultEndpointsProtocol=https;EndpointSuffix=core.windows.net;AccountName=jmsazfunc1sa;AccountKey=Foo==" + }, + { + "name": "AzureWebJobsDashboard", + "slotSetting": false, + "value": "DefaultEndpointsProtocol=https;EndpointSuffix=core.windows.net;AccountName=jmsazfunc1sa;AccountKey=Foo==" + }, + { + "name": "WEBSITE_RUN_FROM_ZIP", + "slotSetting": false, + "value": "https://jmsazfunc1sa.blob.core.windows.net/function-releases/20181008222714-7ab1234-b18b-4466-ad1a-775522548388.zip?sv=2018-03-28&sr=b&sig=Wfoo&st=2018-10-08T22%3A22%3A30Z&se=2028-10-08T22%3A27%3A30Z&sp=r" + }, + { + "name": "JMSAZFUNAPP1_ID", + "slotSetting": false, + "value": "aaaaaaaa-ea2d-4d11-b2c8-bbbbbbbbbbbb" + }, + { + "name": "JMSAZFUNAPP1_PASSWORD", + "slotSetting": false, + "value": "aaaaaaaa-483f-4bb3-926b-bbbbbbbbbbbb" + }, + { + "name": "JMSAZFUNAPP1_TENANT_ID", + "slotSetting": false, + "value": "aaaaaaaa-86f1-41af-cccc-dddddddddddd" + }, + { + "name": "APPLICATION_PREFIX", + "slotSetting": false, + "value": "JMSAZFUNAPP1" + } +] +``` + +Notice that three entried for accessing the Keyvault hae been entered into application settings +above. The Azure Function application name is `jmsazfunapp1`, therefore the entries for the settings +were generated as _[ID, PASSWORD, TENANT]. + +The application itself is stored as `APPLICATION_PREFIX` in the settings. This value can then be +used within the Python function itself to pull in the other values. + +As noted previously, when Python for Azure Functions supports MSI, this step will not be necessary +as MSI will handle access control internally. \ No newline at end of file diff --git a/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions.sh b/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions.sh new file mode 100755 index 00000000..2b760712 --- /dev/null +++ b/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# This script creates a keyvault to be used with an Azure Function to store +# storage credentials. The script: +# +# - creates a keyvault +# - assigns and retrieves the identity of the specified function app +# - adds that identity to the permissions for they keyvault +# +# Usage: setup_keyvault_and_app_permissions.sh + +# Verify the command line args: +if [ "$#" -ne "3" ]; then + echo "Usage: setup_keyvault_and_app_permissions.sh " + exit 1 +fi + +RG_NAME=$1 +KV_NAME=$2 +APP_NAME=$3 + +# Create the key vault +az keyvault create --name $KV_NAME --resource-group $RG_NAME +if [ "$?" -ne "0" ]; then + echo "Unable to create key vault" + exit 1 +fi + +# Assign and retrieve the application identity +APP_ID = az functionapp identity assign -g $RG_NAME -n $APP_NAME +if [ "$?" -ne "0" ]; then + echo "Unable to assign identity to application" + exit 1 +fi + +# Give the application read permissions for secrets in the keyvault +az keyvault set-policy --name $KV_NAME --object-id $APP_ID --secret-permissions get +if [ "$?" -ne "0" ]; then + echo "Unable to give application permissions in keyvault" + exit 1 +fi \ No newline at end of file diff --git a/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions_using_sp.sh b/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions_using_sp.sh new file mode 100755 index 00000000..896a0e02 --- /dev/null +++ b/tutorial/functions/docs/setup/security/keyvault/setup_keyvault_and_app_permissions_using_sp.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# This script creates a keyvault to be used with an Azure Function to store +# storage credentials. It also creates a Service Principal for the Azure +# Function and assigns read only permissions to secrets within the Keyvault. +# The script: +# +# - creates a keyvault +# - creates a service principal for the application +# - assigned permissions to the service principal into within the keyvault +# - stores the service principal ID and Password into the Azure Function Application settings +# +# Usage: setup_keyvault_and_app_permissions_with_sp.sh + +# Verify the command line args: +if [ "$#" -ne "3" ]; then + echo "Usage: setup_keyvault_and_app_permissions.sh " + exit 1 +fi + +RG_NAME=$1 +KV_NAME=$2 +APP_NAME=$3 + +# Create the key vault +az keyvault create --name $KV_NAME --resource-group $RG_NAME +if [ "$?" -ne "0" ]; then + echo "Unable to create key vault" + exit 1 +fi + +# Create a service principal for the application +APP_SP_INFO=`az ad sp create-for-rbac --name https://$APP_NAME.azurewebsites.net` +APP_SP_ID=`echo $APP_SP_INFO | jq -r .appId` +APP_SP_PASSWORD=`echo $APP_SP_INFO | jq -r .password` +APP_TENANT_ID=`echo $APP_SP_INFO | jq -r .tenant` + +# Give the service principal read permissions for secrets in the keyvault +az keyvault set-policy --name $KV_NAME --spn $APP_SP_ID --secret-permissions get +if [ "$?" -ne "0" ]; then + echo "Unable to give application permissions in keyvault" + exit 1 +fi + +# Store the values in settings +APPNAME_UPPER=`echo $APP_NAME | awk '{print toupper($0)}'` +APP_SETTING_ID_KEY=$APPNAME_UPPER"_ID" +APP_SETTING_PASSWORD_KEY=$APPNAME_UPPER"_PASSWORD" +APP_SETTING_TENANT_ID_KEY=$APPNAME_UPPER"_TENANT_ID" +az functionapp config appsettings set --name $APP_NAME --resource-group $RG_NAME --settings $APP_SETTING_ID_KEY=$APP_SP_ID $APP_SETTING_PASSWORD_KEY=$APP_SP_PASSWORD $APP_SETTING_TENANT_ID_KEY=$APP_TENANT_ID \ No newline at end of file diff --git a/tutorial/functions/examples/keyvault/service_principal/README.md b/tutorial/functions/examples/keyvault/service_principal/README.md new file mode 100644 index 00000000..3d46ece7 --- /dev/null +++ b/tutorial/functions/examples/keyvault/service_principal/README.md @@ -0,0 +1,5 @@ +## Interacting with Keyvault using Service Principal + +The included Python script shows an example of using a service principal exposed in environment variables +and using that service principal to access entries within the KeyVault. The code can easily be used in +a Python Azure Function where the config information is injected into the settings of the function application. diff --git a/tutorial/functions/examples/keyvault/service_principal/requirements.txt b/tutorial/functions/examples/keyvault/service_principal/requirements.txt new file mode 100644 index 00000000..789a3fc6 --- /dev/null +++ b/tutorial/functions/examples/keyvault/service_principal/requirements.txt @@ -0,0 +1,2 @@ +azure-keyvault>=1.1.0 +azure-common>=1.1.16 \ No newline at end of file diff --git a/tutorial/functions/examples/keyvault/service_principal/retrieve_secret.py b/tutorial/functions/examples/keyvault/service_principal/retrieve_secret.py new file mode 100644 index 00000000..affdf374 --- /dev/null +++ b/tutorial/functions/examples/keyvault/service_principal/retrieve_secret.py @@ -0,0 +1,60 @@ +import logging +import os + +import azure.functions as func +from azure.keyvault import KeyVaultClient, KeyVaultAuthentication +from azure.common.credentials import ServicePrincipalCredentials + +def get_config_from_environment(): + config = {} + config_base = os.environ.get('APPLICATION_PREFIX', None) + if config_base is None: + logging.error('APPLICATION_PREFIX must be set') + return None + config['client_id'] = os.environ.get(config_base + '_ID', None) + config['client_secret'] = os.environ.get(config_base + '_PASSWORD', None) + config['tenant_id'] = os.environ.get(config_base + '_TENANT_ID', None) + if config['client_id'] is None or config['client_secret'] is None or config['tenant_id'] is None: + logging.error(config_base + '_ID, ' + config_base + '_PASSWORD, ' + config_base + '_TENANT_ID must all be set.') + return None + config['keyault_uri'] = os.environ.get('KEYVAULT_URI', None) + if config['keyault_uri'] is None: + logging.error('KEYVAULT_URI must be set') + return None + config['storage_account_kv_base'] = os.environ.get('STORAGE_ACCOUNT_KV_BASE', None) + if config['storage_account_kv_base'] is None: + logging.error('STORAGE_ACCOUNT_KV_BASE must be set') + return None + return config + +def retrieve_keyvault_client(config): + # create the service principle credentials used to authenticate the client + credentials = ServicePrincipalCredentials(client_id=config['client_id'], + secret=config['client_secret'], + tenant=config['tenant_id']) + # create the client using the created credentials + client = KeyVaultClient(credentials) + return client + +def retrieve_storage_account_info(config): + client = retrieve_keyvault_client(config) + vaultUri = config['keyault_uri'] + saNameKey = config['storage_account_kv_base'] + '_name' + saValueKey = config['storage_account_kv_base'] + '_name' + logging.error('Vault ' + vaultUri + ', name key: ' + saNameKey + ', value key: ' + saValueKey) + sa_name_bundle = client.get_secret(config['keyault_uri'], config['storage_account_kv_base'] + '_name', secret_version="") + sa_key_bundle = client.get_secret(config['keyault_uri'], config['storage_account_kv_base'] + '_key', secret_version="") + return sa_name_bundle, sa_key_bundle + +def main(req: func.HttpRequest) -> func.HttpResponse: + logging.info('Retrieve secret from Azure Keyvault.') + + config = get_config_from_environment() + if config is None: + return func.HttpResponse( + "Configuration incorrect", + status_code=503 + ) + + name, key = retrieve_storage_account_info(config) + return func.HttpResponse(f"Got {name}, {key}") \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/utils/blob_utils.py b/utils/blob_utils.py index e69de29b..3eabf131 100644 --- a/utils/blob_utils.py +++ b/utils/blob_utils.py @@ -0,0 +1,17 @@ +from azure.storage.blob import BlockBlobService, ContentSettings + +class BlobStorage(): + azure_storage_client = None + + #TODO: Verify the storage account is correct. Currently we get an unhelpful error message if you have a type in Storage Name + @staticmethod + def get_azure_storage_client(config): + if BlobStorage.azure_storage_client is not None: + return BlobStorage.azure_storage_client + + BlobStorage.azure_storage_client = BlockBlobService( + config.get("storage_account"), + account_key=config.get("storage_key") + ) + + return BlobStorage.azure_storage_client \ No newline at end of file diff --git a/utils/config.py b/utils/config.py index 0301b335..6eed72fd 100644 --- a/utils/config.py +++ b/utils/config.py @@ -1,11 +1,110 @@ +import configparser +import os + +FUNCTIONS_SECTION = 'FUNCTIONS' +FUNCTIONS_KEY = 'FUNCTIONS_KEY' +FUNCTIONS_URL = 'FUNCTIONS_URL' + +STORAGE_SECTION = 'STORAGE' +STORAGE_KEY = 'STORAGE_KEY' +STORAGE_ACCOUNT = 'STORAGE_ACCOUNT' +STORAGE_TEMP_CONTAINER = 'STORAGE_TEMP_CONTAINER' +STORAGE_PERM_CONTAINER = 'STORAGE_PERM_CONTAINER' + +TAGGING_SECTION = 'TAGGING' +TAGGING_LOCATION_KEY = 'TAGGING_LOCATION' +TAGGING_USER_KEY = 'TAGGING_USER' + class Config(): @staticmethod def parse_file(file_name): config = {} - for line in open(file_name): - line = line.strip() - if line and line[0] is not "#": - var,value = line.split('=', 1) - config[var.strip()] = value.strip() + with open(file_name) as file_: + for line in file_: + line = line.strip() + if line and line[0] is not "#": + var,value = line.split('=', 1) + config[var.strip()] = value.strip() + return config + + @staticmethod + def storage_config_section(storage_config_section): + storage_account_value = storage_config_section.get(STORAGE_ACCOUNT) + storage_key_value = storage_config_section.get(STORAGE_KEY) + storage_temp_container_value = storage_config_section.get(STORAGE_TEMP_CONTAINER) + storage_perm_container_value = storage_config_section.get(STORAGE_PERM_CONTAINER) + + if not storage_account_value or not storage_key_value or not storage_temp_container_value or not storage_perm_container_value: + raise MissingConfigException() + + return storage_account_value, storage_key_value, storage_temp_container_value, storage_perm_container_value + + @staticmethod + def tagging_config_section(tagging_config_section): + tagging_location_value = tagging_config_section.get(TAGGING_LOCATION_KEY) + tagging_user_value = tagging_config_section.get(TAGGING_USER_KEY) + + if not tagging_location_value or not tagging_user_value: + raise MissingConfigException() + + return tagging_location_value, tagging_user_value + + @staticmethod + def functions_config_section(functions_config_section): + functions_key_value = functions_config_section.get(FUNCTIONS_KEY) + functions_url_value = functions_config_section.get(FUNCTIONS_URL) + + if not functions_key_value or not functions_url_value: + raise MissingConfigException() + + return functions_key_value, functions_url_value + + @staticmethod + def read_config_with_parsed_config(parser): + sections = parser.sections() + + if FUNCTIONS_SECTION not in sections: + raise MissingConfigException() + + if STORAGE_SECTION not in sections: + raise MissingConfigException() + + if TAGGING_SECTION not in sections: + raise MissingConfigException() + + functions_key, functions_url = Config.functions_config_section( + parser[FUNCTIONS_SECTION] + ) + + storage_account, storage_key, storage_temp_container, storage_perm_container = Config.storage_config_section( + parser[STORAGE_SECTION] + ) + + tagging_location, tagging_user = Config.tagging_config_section(parser[TAGGING_SECTION]) + + return { + "key": functions_key, + "url": functions_url, + "storage_account": storage_account, + "storage_key": storage_key, + "storage_temp_container": storage_temp_container, + "storage_perm_container": storage_perm_container, + "tagging_location": tagging_location, + "tagging_user": tagging_user + } + + @staticmethod + def read_config(config_path): + if config_path is None: + raise MissingConfigException() + + parser = configparser.ConfigParser() + parser._interpolation = configparser.ExtendedInterpolation() + parser.read(config_path) + return Config.read_config_with_parsed_config(parser) + + +class MissingConfigException(Exception): + pass \ No newline at end of file diff --git a/utils/convert_to_jpeg.py b/utils/convert_to_jpeg.py deleted file mode 100644 index eb18274b..00000000 --- a/utils/convert_to_jpeg.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -from PIL import Image - -curpath = os.getcwd() -for root, dirs, files in os.walk(curpath, topdown=False): - for name in files: - if os.path.splitext(os.path.join(root, name))[1].lower() == ".tif": - reldir = os.path.relpath(root, curpath) - if os.path.isfile(os.path.splitext(os.path.join(curpath, "JPEG", reldir, name))[0] + ".jpg"): - print("A jpeg file already exists for %s" % name) - else: - outfile = os.path.splitext(os.path.join(curpath, "JPEG", reldir, name))[0] + ".jpg" - if not os.path.exists(os.path.join(curpath, "JPEG", reldir)): - os.makedirs(os.path.join(curpath, "JPEG", reldir)) - try: - im = Image.open(os.path.join(root, name)) - print("Generating jpeg for %s" % name) - im.thumbnail(im.size) - im.save(outfile, "JPEG", quality=95) - except: - print("Error") - else: - print(os.path.join(root, name) + " is not a tiff file.") \ No newline at end of file diff --git a/utils/decode_tf_record.py b/utils/decode_tf_record.py deleted file mode 100644 index fe4c0eaa..00000000 --- a/utils/decode_tf_record.py +++ /dev/null @@ -1,37 +0,0 @@ -import numpy as np -import tensorflow as tf -from pathlib import Path -import cv2 -import csv - -def decode_record(record_file, output_folder): - output_folder = Path(output_folder) - output_folder.mkdir(parents=True, exist_ok=True) - output_file = output_folder/"output.csv" - record_iterator = tf.python_io.tf_record_iterator(record_file) - for string_record in record_iterator: - example = tf.train.Example() - example.ParseFromString(string_record) - filename = example.features.feature['image/filename'].bytes_list.value[0].decode("utf-8") - height = int(example.features.feature['image/height'].int64_list.value[0]) - width = int(example.features.feature['image/width'].int64_list.value[0]) - xmins = example.features.feature['image/object/bbox/xmin'].float_list.value - ymins = example.features.feature['image/object/bbox/ymin'].float_list.value - xmaxs = example.features.feature['image/object/bbox/xmax'].float_list.value - ymaxs = example.features.feature['image/object/bbox/ymax'].float_list.value - classes = example.features.feature['image/object/class/text'].bytes_list.value - img_raw = (example.features.feature['image/encoded'].bytes_list.value[0]) - img_raw = np.fromstring(img_raw, dtype=np.uint8) - cv2_image = cv2.imdecode(img_raw, cv2.IMREAD_COLOR) - cv2.imwrite(str(output_folder/(filename+".JPG")),cv2_image) - with output_file.open('a') as out_csv: - tagwriter = csv.writer(out_csv) - for xmin, ymin, xmax, ymax, class_raw in zip(xmins, ymins, xmaxs, ymaxs, classes): - tagwriter.writerow([filename,class_raw.decode("utf-8"),float(xmin),float(xmax),float(ymin),float(ymax),height,width]) -if __name__ == "__main__": - import sys - if len(sys.argv)<3: - raise ValueError("Need to specify input file and output folder") - input_file = sys.argv[1] - output_folder = sys.argv[2] - decode_record(input_file, output_folder) diff --git a/utils/mock_response.json b/utils/mock_response.json new file mode 100644 index 00000000..dcc0d669 --- /dev/null +++ b/utils/mock_response.json @@ -0,0 +1,24 @@ +[{"image_height": 483, + "image_width": 724, + "image_id": 199, + "imagelocation": "https://mock-storage.blob.core.windows.net/perm-images/199.JPG", + "labels": { + "classification_names": "knot", + "image_id": 199, + "x_min": 3.0, + "x_max": 9.0, + "y_min": 4.0, + "y_max": 7.0 + }}, + {"image_height": 483, + "image_width": 724, + "image_id": 211, + "imagelocation": "https://mock-storage.blob.core.windows.net/perm-images/211.JPG", + "labels": { + "classification_names": "defect", + "image_id": 211, + "x_min": 3.0, + "x_max": 9.0, + "y_min": 4.0, + "y_max": 7.0 + }}] \ No newline at end of file diff --git a/utils/repartition_test_set.sh b/utils/repartition_test_set.sh deleted file mode 100644 index 6a1e17e3..00000000 --- a/utils/repartition_test_set.sh +++ /dev/null @@ -1,19 +0,0 @@ -# Separate test set -filearr=($(shuf -e $(find "$image_dir" -type f -name $filetype))) -test_num=$(echo "scale=0;${#filearr[@]}*${test_percentage}/1" | bc) - -mkdir -p ${image_dir}/test -mkdir -p ${image_dir}/train -# Separate test set -filearr=($(shuf -e $(ls -pL $image_dir | grep -v /))) -split=$(echo "scale=0;${#filearr[@]}*${train_percentage}/1" | bc) -for i in "${!filearr[@]}"; do - if (("$i" < "$split")); then - mv ${image_dir}/${filearr[$i]} ${image_dir}/train/${filearr[$i]} - else - mv ${image_dir}/${filearr[$i]} ${image_dir}/test/${filearr[$i]} - fi -done - -printf "%s\n" "${filearr[@]:0:$test_num}" > ${image_dir}/test.txt -az storage blob upload --container-name activelearninglabels --file ${image_dir}/test.txt --name test_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY \ No newline at end of file diff --git a/utils/test_vott_parser.py b/utils/test_vott_parser.py new file mode 100644 index 00000000..80cd203e --- /dev/null +++ b/utils/test_vott_parser.py @@ -0,0 +1,25 @@ +import unittest +import json +import pathlib +import os +from unittest.mock import Mock +from .vott_parser import process_vott_json, create_starting_vott_json, build_id_to_VottImageTag, create_vott_json_from_image_labels +from functions.pipeline.shared.db_access import ImageLabel + +class TestVOTTParser(unittest.TestCase): + def test_create_vott_json(self): + dirname, _ = os.path.split(os.path.abspath(__file__)) + with open(dirname + '/mock_response.json') as f: + data = json.load(f) + image_labels = [ImageLabel.fromJson(item) for item in data] + existing_classification_list = ['road', 'superdefect', 'test', 'water', 'superknot', 'knot', 'car', 'cloud', 'mountain', 'defect'] + vott_json, image_urls = create_vott_json_from_image_labels(image_labels, existing_classification_list) + self.assertEqual(len(image_urls), 2) + self.assertIsNotNone(vott_json["frames"]["199.JPG"]) + self.assertTrue("inputTags" in vott_json) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/utils/vott_parser.py b/utils/vott_parser.py new file mode 100644 index 00000000..77687786 --- /dev/null +++ b/utils/vott_parser.py @@ -0,0 +1,265 @@ +import json +from functions.pipeline.shared.db_access import ImageTag +import string +import random + +# Vott tags have image height & width data as well. +class VottImageTag(ImageTag): + def __init__(self, image_id, x_min, x_max, y_min, y_max, classification_names, image_height, image_width, image_location): + super().__init__(image_id, x_min, x_max, y_min, y_max, classification_names) + self.image_height = image_height + self.image_width = image_width + self.image_location = image_location + +def __build_tag_from_VottImageTag(image_tag): + return { + "x1": image_tag.x_min, + "x2": image_tag.x_max, + "y1": image_tag.y_min, + "y2": image_tag.y_max, + "width": image_tag.image_width, + "height": image_tag.image_height, + "tags": [image_tag.classification_names], + "UID": __generate_uid(), + "box": { + "x1": image_tag.x_min, + "x2": image_tag.x_max, + "y1": image_tag.y_min, + "y2": image_tag.y_max, + }, + "type": "Rectangle", + "id": image_tag.image_id, + "name": 2 + } + + +def build_id_to_VottImageTag(row): + tag_id_to_VottImageTag = {} + try : + tag_id = row[0] + if tag_id in tag_id_to_VottImageTag: + tag_id_to_VottImageTag[tag_id].classification_names.append(row[6].strip()) + elif row[4] and row[5] and row[6] and row[7]: + tag_id_to_VottImageTag[tag_id] = VottImageTag(row[0], float(row[4]), float(row[5]), + float(row[6]), float(row[7]), [row[3].strip()], + row[8], row[9], row[1]) + except Exception as e: + print("An error occurred building VottImageTag dict: {0}".format(e)) + raise + return tag_id_to_VottImageTag + +def __build_tag_list_from_VottImageTags(image_tag_list): + tag_list = [] + for image_tag in image_tag_list: + if image_tag: + tag_list.append(__build_tag_from_VottImageTag(image_tag)) + return tag_list + + +def __build_frames_data(image_id_to_urls, image_id_to_image_tags): + frames = {} + for image_id in image_id_to_image_tags.keys(): + image_file_name = __get_filename_from_fullpath(image_id_to_urls[image_id]) + image_tags = __build_tag_list_from_VottImageTags(image_id_to_image_tags[image_id]) + frames[image_file_name] = image_tags + return frames + + +def create_vott_json_from_image_labels(image_labels, existing_classifications_list): + frames = {} + image_urls = [] + + for label in image_labels: + image_file_name = __get_filename_from_fullpath(label.imagelocation) + image_urls.append(label.imagelocation) + image_tags = [] + for tag in label.labels: + if tag and tag.x_min and tag.x_max and tag.y_min and tag.y_max: + vott_image_tag = VottImageTag(label.image_id, tag.x_min, tag.x_max, tag.y_min, tag.y_max, tag.classification_names, label.image_height, label.image_width, label.imagelocation) + image_tags.append(__build_tag_from_VottImageTag(vott_image_tag)) + + frames[image_file_name] = image_tags + + # "inputTags" + class_length = len(existing_classifications_list) + classification_str = "" + for i in range(class_length): + classification_str += existing_classifications_list[i] + if i != class_length-1: classification_str+="," + + return { + "frames": frames, + "inputTags": classification_str, + "scd": False # Required for VoTT and image processing? unknown if it's also used for video. + }, image_urls + + +# For download function +def create_starting_vott_json(image_id_to_urls, image_id_to_image_tags, existing_classifications_list): + # "frames" + frame_to_tag_list_map = __build_frames_data(image_id_to_urls, image_id_to_image_tags) + + # "inputTags" + class_length = len(existing_classifications_list) + classification_str = "" + for i in range(class_length): + classification_str += existing_classifications_list[i] + if i != class_length-1: classification_str+="," + + return { + "frames": frame_to_tag_list_map, + "inputTags": classification_str, + "scd": False # Required for VoTT and image processing? unknown if it's also used for video. + } + + +def __get_filename_from_fullpath(filename): + path_components = filename.split('/') + return path_components[-1] + + +def __get_id_from_fullpath(fullpath): + return int(__get_filename_from_fullpath(fullpath).split('.')[0]) + + +# Returns a list of processed tags for a single frame +def __create_tag_data_list(json_tag_list): + processed_tags = [] + for json_tag in json_tag_list: + processed_tags.append(__process_json_tag(json_tag)) + return processed_tags + + +def __generate_uid(size=8, chars=string.ascii_lowercase + string.digits): + return ''.join(random.choice(chars) for _ in range(size)) + +def __process_json_tag(json_tag): + return { + "x1": json_tag['x1'], + "x2": json_tag['x2'], + "y1": json_tag['y1'], + "y2": json_tag['y2'], + "UID": json_tag["UID"], + "id": json_tag["id"], + "type": json_tag["type"], + "classes": json_tag["tags"], + "name": json_tag["name"] + } + + +# For upload function +def process_vott_json(json): + all_frame_data = json['frames'] + + # Scrub filename keys to only have integer Id, drop path and file extensions. + id_to_tags_dict = {} + for full_path_key in sorted(all_frame_data.keys()): + # Map ID to list of processed tag data + id_to_tags_dict[__get_id_from_fullpath(full_path_key)] = __create_tag_data_list(all_frame_data[full_path_key]) + all_ids = list(id_to_tags_dict.keys()) + + # Remove images with no tags from dict + for id in all_ids: + if not id_to_tags_dict[id]: + del(id_to_tags_dict[id]) + + # Do the same with visitedFrames + visited_ids = sorted(json['visitedFrames']) + for index, filename in enumerate(visited_ids): + visited_ids[index] = __get_id_from_fullpath(filename) + + visited_no_tag_ids = sorted(list(set(visited_ids) - set(id_to_tags_dict.keys()))) + + # Unvisisted imageIds + unvisited_ids = sorted(list(set(all_ids) - set(visited_ids))) + + #TODO: A cleaner way to do this + all_class_name_lists = [] + unique_class_names = [] + for val in id_to_tags_dict.values(): + for v in val: + all_class_name_lists.append(v["classes"]) + for c in set(x for l in all_class_name_lists for x in l): + unique_class_names.append(c) + + return { + "totalNumImages" : len(all_ids), + "numImagesVisted" : len(visited_ids), + "numImagesVisitedNoTag": len(visited_no_tag_ids), + "numImagesNotVisted" : len(unvisited_ids), + "imagesVisited" : visited_ids, + "imagesNotVisited" : unvisited_ids, + "imagesVisitedNoTag": visited_no_tag_ids, + "imageIdToTags": id_to_tags_dict, + "uniqueClassNames": unique_class_names + } + + +def main(): + images = { + "1.png" : {}, + "2.png" : {}, + "3.png" : {}, + "4.png" : {}, + "5.png" : {} + } + generated_json = create_starting_vott_json(images) + print("generating starting default json for vott_parser download") + print(json.dumps(generated_json)) + + print('testing tag creation') + tag1 = __build_json_tag(122, 171, 122, 191, 488, 512, "uiduiduid", 2, "Rectangle", ["Ford", "Volvo", "BMW"],2) + print(tag1) + print(json.dumps(tag1)) + + print('testing adding two sets') + output_json = { + "frames" : { + "1.png": [], + "2.png": [tag1, tag1], + "3.png": [tag1], + "4.png": [], + "5.png": [] + }, + "visitedFrames": [] + } + print() + print('bare') + print(json.dumps(output_json)) + print() + print("Testing process_vott_json") + print(json.dumps(process_vott_json(output_json))) + print() + print(json.dumps(output_json)) + + # tag_data = __get_components_from_json_tag(output_json["frames"]["2"][0]) + # print("tag_data: ---" + str(tag_data)) + # add_tag_to_db('something', 2, (tag_data)) + + +# Currently only used for testing... +# returns a json representative of a tag given relevant components +def __build_json_tag(x1, x2, y1, y2, img_width, img_height, UID, id, type, tags, name): + return { + "x1": x1, + "x2": x2, + "y1": y1, + "y2": y2, + "width": img_width, + "height": img_height, + "box" : { + "x1": x1, + "x2": x2, + "y1": y1, + "y2": y2 + }, + "UID": UID, + "id": id, + "type": type, + "tags": tags, + "name": name + } + + +if __name__ == '__main__': + main()