diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d33cdb --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# End of https://www.toptal.com/developers/gitignore/api/python \ No newline at end of file diff --git a/lumos/__init__.py b/lumos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lumos/generator.py b/lumos/generator.py new file mode 100644 index 0000000..3fd99c2 --- /dev/null +++ b/lumos/generator.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" Main functions to generate platemaps with lumos + +""" + +import os +from pathlib import Path, PureWindowsPath +import pandas as pd +from shutil import copyfile +import shutil +from . import toolbox +from . import parameters +import cv2 +from tqdm import tqdm +import logging +import numpy as np + + +def generate_plate_image( + plate_input_path_string, + plate_name, + channel_to_render, + channel_label, + temp_folder, +): + """Generate an image of a cellpainting plate for a specific channel + Args: + plate_images_folder_path : The folder where the images of the plate are stored + plate_name: name of the plate + channel_to_render: the cellpainting channel to render + channel_label: The text describing the channel type + temp_folder_path: The folder where temporary data can be stored + Return: 8 bit cv2 image + """ + + # define a temp folder for the run + temp_folder = temp_folder + "/tmpgen-" + plate_name + channel_to_render + + # remove temp dir if existing + shutil.rmtree(temp_folder, ignore_errors=True) + + # create the temporary directory structure to work on images + try: + os.mkdir(temp_folder) + os.mkdir(temp_folder + "/wells") + + except FileExistsError: + pass + + # read the plate input path + plate_input_path = Path(PureWindowsPath(plate_input_path_string)) + + # get the files from the plate folder, for the targeted channel + images_full_path_list = list( + Path(plate_input_path).glob("*" + channel_to_render + ".tif") + ) + + # check that we get 2304 images for a 384 well image + try: + assert len(images_full_path_list) == 2304 + except AssertionError: + print( + "The plate does not have the exact image count expected", + len(images_full_path_list), + ) + + logging.info( + "Start plate image generation for channel:" + + str(channel_to_render) + + " " + + str(channel_label) + ) + + # get the filenames list + images_full_path_list.sort() + images_filename_list = [str(x.name) for x in images_full_path_list] + + # get the well list + image_well_list = [x.split("_")[1].split("_T")[0] for x in images_filename_list] + + # get the siteid list (sitesid from 1 to 6) + image_site_list = [ + x.split("_T0001F")[1].split("L")[0] for x in images_filename_list + ] + image_site_list_int = [int(x) for x in image_site_list] + + # zip all in a data structure + image_data_zip = zip( + image_well_list, + image_site_list_int, + images_filename_list, + images_full_path_list, + ) + + # convert the zip into dataframe + data_df = pd.DataFrame( + list(image_data_zip), columns=["well", "site", "filename", "fullpath"] + ) + + # get the theoretical well list for 384 well plate + well_theoretical_list = [ + l + str(r).zfill(2) for l in "ABCDEFGHIJKLMNOP" for r in range(1, 25) + ] + well_site_theoretical_list = [ + [x, r] for x in well_theoretical_list for r in range(1, 7) + ] + + # create the theoretical well dataframe + theoretical_data_df = pd.DataFrame( + well_site_theoretical_list, columns=["well", "site"] + ) + + # join the real wells with the theoric ones + theoretical_data_df_joined = theoretical_data_df.merge( + data_df, + left_on=["well", "site"], + right_on=["well", "site"], + how="left", + ) + + # log if there is a delta between theory and actual plate wells + delta = set(well_theoretical_list) - set(image_well_list) + logging.info("Well Delta " + str(delta)) + + # promote theoretical over actual + data_df = theoretical_data_df_joined + + # get the site images and store them locally + logging.info("Copy sources images in temp folder..") + + for index, current_image in tqdm( + data_df.iterrows(), + desc="Download images to temp", + unit="images", + colour="#006464", + leave=True, + ): + # do not copy if file exists + if not os.path.isfile(temp_folder + "/" + str(current_image["filename"])): + try: + copyfile( + current_image["fullpath"], + temp_folder + "/" + str(current_image["filename"]), + ) + except TypeError: + logging.warning( + "TypeError:" + + str(current_image["fullpath"]) + + str(temp_folder) + + "/" + + str(current_image["filename"]) + ) + + # get the well set + well_list = list(set(data_df["well"])) + well_list.sort() + + logging.info("Generate Well images") + + # generate one image per well by concatenation of image sites + wellprogressbar = tqdm(list(well_list), unit="wells", leave=False) + for current_well in wellprogressbar: + + wellprogressbar.set_description("Processing well %s" % current_well) + # get the 6 images metadate of the well + current_wells_df = data_df[data_df["well"] == current_well] + + # load 6 wells into an image list + image_list = [] + for current_site in range(1, 7): + img = toolbox.load_site_image(current_site, current_wells_df, temp_folder) + try: + img.shape + except: + # create blank file + img = np.full((1000, 1000, 1), 32768, np.uint16) + logging.warning("Missing file in well" + current_well) + + image_list.append(img) + + # clip and rescale each image individualy + rescaled_image_list = [] + for img in image_list: + img_norm = img * parameters.channel_coefficients[channel_to_render] + rescaled_image_list.append(img_norm) + + # concatenate horizontally and vertically + sites_row1 = cv2.hconcat( + [rescaled_image_list[0], rescaled_image_list[1], rescaled_image_list[2]] + ) + sites_row2 = cv2.hconcat( + [rescaled_image_list[3], rescaled_image_list[4], rescaled_image_list[5]] + ) + all_sites_image = cv2.vconcat([sites_row1, sites_row2]) + all_sites_image_norm = all_sites_image + + # convert to 8 bit + # comment the following line to generate interesting images + all_sites_image = all_sites_image_norm / 256 + all_sites_image = all_sites_image.astype("uint8") + + # add well id on image + text = current_well + " " + channel_label + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText( + all_sites_image, + text, + (25, 125), + font, + 4, + (192, 192, 192), + 8, + cv2.INTER_AREA, + ) + + # add well marks on borders + image_shape = all_sites_image.shape + cv2.rectangle( + all_sites_image, + (0, 0), + (image_shape[1], image_shape[0]), + (192, 192, 192), + 8, + ) + # resize + all_sites_image_resized = cv2.resize( + src=all_sites_image, + dsize=None, + fx=parameters.rescale_ratio, + fy=parameters.rescale_ratio, + interpolation=cv2.INTER_CUBIC, + ) + # save + cv2.imwrite( + temp_folder + "/wells/well-" + str(current_well) + ".png", + all_sites_image_resized, + ) + + # load all well images and store images in memory into a list + print("Load well images in memory..") + logging.info("Generate Well images") + + image_well_data = [] + for current_well in list(well_list): + well_image = toolbox.load_well_image( + current_well, + temp_folder + "/wells", + ) + image_well_data.append(well_image) + + # concatenate all the well images into horizontal stripes (1 per row) + logging.info("Concatenate images into a plate..") + + image_row_data = [] + for current_plate_row in range(1, 17): + + # concatenate horizontally and vertically + well_start_id = ((current_plate_row - 1) * 24) + 0 + well_end_id = current_plate_row * 24 + sites_row = cv2.hconcat(image_well_data[well_start_id:well_end_id]) + image_row_data.append(sites_row) + + # concatenate all the stripes into 1 image + plate_image = cv2.vconcat(image_row_data) + + # purge temp files + logging.info("Purge temporary folder") + + shutil.rmtree(temp_folder, ignore_errors=True) + + return plate_image + + +def render_single_channel_plateview( + source_path, plate_name, channel, channel_label, output_path, temp_folder +): + """Render 1 image for a specific plate channel + args: + source_path: the source folder containing plate images + plate_name: name of the plate + channel: the code of the channel to render (e.g. C02) + channel_label: the text detail of the channel + output_path: the folder where to save the image + temp_folder: temporary work folder + returns: true in case of success + """ + + # generate cv2 image for the channel + plate_image = generate_plate_image( + source_path, + plate_name, + channel, + channel_label, + temp_folder, + ) + # save image + plate_image_path = ( + output_path + + "/" + + plate_name + + "-" + + str(channel) + + "-" + + str(parameters.channel_coefficients[channel]) + + ".jpg" + ) + print("Generated image of size:", plate_image.shape) + print("Saved as ", plate_image_path) + + cv2.imwrite(plate_image_path, plate_image) + + return + + +def render_single_plate_plateview( + source_path, + plate_name, + channel_list, + channel_details_dict, + output_path, + temp_folder, +): + """Render images (1 per channel) for a specific plate + args: + source_path: the source folder containing plate images + plate_name: name of the plate + channel_list: The list of channels to render + channel_details_dict: channel details stored in a dict + output_path: the folder where to save the images + temp_folder: temporary work folder + returns: true in case of success + """ + for current_channel in tqdm( + channel_list, + desc="Render plate channels", + unit="channel plateview", + colour="green", + bar_format=None, + ): + print("Generate", current_channel, channel_details_dict[current_channel]) + + render_single_channel_plateview( + source_path, + plate_name, + current_channel, + channel_details_dict[current_channel], + output_path, + temp_folder, + ) + + return + + +def render_single_run_plateview( + source_path, + folder_list, + channel_list, + channel_details_dict, + output_path, + temp_folder, +): + """Render images for all plates of a run + args: + source_path: the source folder containing plate images + folder_list: name of the plates and their respective path inside a dict + channel_list: The list of channels to render for each plate + channel_details_dict: channel details stored in a dict + output_path: the folder where to save the images + temp_folder: temporary work folder + returns: true in case of success + """ + + for current_plate in folder_list.keys(): + print("Render", current_plate) + + # render all the channels of the plate + render_single_plate_plateview( + folder_list[current_plate], + current_plate, + parameters.default_per_plate_channel_to_render, + parameters.cellplainting_channels_dict, + output_path, + temp_folder, + ) + + return diff --git a/lumos/parameters.py b/lumos/parameters.py new file mode 100644 index 0000000..54325f2 --- /dev/null +++ b/lumos/parameters.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +cellplainting_channels = [ + ["C01 DNA Hoechst 33342", "C01"], + ["C02 ER Concanavalin A", "C02"], + ["C03 RNA SYTO 14", "C03"], + ["C04 AGP Phalloidin and WGA", "C04"], + ["C05 MITO MitoTracker", "C05"], + ["C06 Brigtfield depth1", "Z01C06"], + ["C06 Brigtfield depth2", "Z02C06"], + ["C06 Brigtfield depth3", "Z03C06"], +] + +cellplainting_channels_dict = { + "C01": "DNA Hoechst 33342", + "C02": "ER Concanavalin A", + "C03": "RNA SYTO 14", + "C04": "AGP Phalloidin and WGA", + "C05": "MITO MitoTracker Deep Red", + "Z01C06": "Brightfield depth1", + "Z02C06": "Brightfield depth2", + "Z03C06": "Brightfield depth3", +} + +# what is the default channel to render for a single channel rendering +default_channel_to_render = cellplainting_channels[0][1] + +# what are the channel to render a singleplate rendering +default_per_plate_channel_to_render = [ + "C01", + "C02", + "C03", + "C04", + "C05", + "Z01C06", + "Z02C06", + "Z03C06", +] + +# rescale coefficient factors per channel +channel_coefficients = { + "C01": 16, + "C02": 8, + "C03": 8, + "C04": 8, + "C05": 8, + "Z01C06": 8, + "Z02C06": 8, + "Z03C06": 8, +} + +clipping_threshold_min_value = 1 +clipping_threshold_max_value = 12000 +normalize_alpha = 0 +normalize_beta = 65535 +rescale_ratio = 0.1 diff --git a/lumos/toolbox.py b/lumos/toolbox.py new file mode 100644 index 0000000..546dabb --- /dev/null +++ b/lumos/toolbox.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Extra helper functions for lumos +""" +import cv2 +import pandas as pd +import os +import logging + + +def load_site_image(site, current_wells_df, source_folder): + """load a site image, for a specific channel + Args: + site: the id of the site (between 1 and 6) + current_wells_df: the dataframe containing image metadata + source_folder: the path where the images are stored + Return: + a cv2 image + """ + + for id, current_site in current_wells_df.iterrows(): + # process field 1 + if current_site["site"] == site: + if not os.path.isfile(source_folder + "/" + str(current_site["filename"])): + logging.info("warning,: path does not exist") + site_img = cv2.imread( + source_folder + "/" + str(current_site["filename"]), -1 + ) + + try: + site_img.shape + except: + logging.info("Load Error") + return + + return site_img + + +def load_well_image(well, source_folder): + """load a wellimage, for a specific channel + Well images are temporary images made by lumos for a specific channel + Args: + well: the id of the well (e.g. A23) + source_folder: the path where the images are stored + + Return: + a cv2 image + """ + well_image_path = source_folder + "/well-" + str(well) + ".png" + if not os.path.isfile(well_image_path): + logging.info("warning,: path does not exist") + well_img = cv2.imread(well_image_path) + + try: + well_img.shape + except: + logging.info("Load Error") + + return well_img diff --git a/lumoscli.py b/lumoscli.py new file mode 100644 index 0000000..43a6f7b --- /dev/null +++ b/lumoscli.py @@ -0,0 +1,182 @@ +"""Lumos command line interface module""" + +import tempfile +from pathlib import Path +import os +import fnmatch +import logging + +import click +from art import text2art +from lumos import parameters +from lumos.generator import ( + render_single_channel_plateview, + render_single_plate_plateview, + render_single_run_plateview, +) + + + +# print lumos header +header_ascii_art = text2art("Lumos", font="big") +print(header_ascii_art) + +# initialize a temp dir location +default_temp_directory = tempfile.gettempdir() + +# activate log in temporary directory +logging.basicConfig( + filename=default_temp_directory +"lumos.log", + level=logging.DEBUG, + format="%(asctime)s %(message)s" +) + +# collect parameters +cellpaintingchannels = [x[1] for x in parameters.cellplainting_channels] + +@click.command() +@click.option( + "--scope", + type=click.Choice(["run", "plate", "channel"], case_sensitive=False), + help="If you want to generate a plateview from a single channel, plate or whole run", +) +@click.option( + "--channel", + type=click.Choice(cellpaintingchannels, case_sensitive=True), + help="For single channel render only", +) +@click.option( + "--source-path", + type=click.Path(exists=True), + help="Folder of your run or single plate", +) +@click.option( + "--output-path", + type=click.Path(exists=True), + help="Folder where images will be output", +) +@click.option( + "--temp-path", + type=click.Path(exists=True), + help="Temporary working folder path. Default will be: " + + str(default_temp_directory), + default=default_temp_directory, +) +def start(scope, channel, source_path, output_path, temp_path): + """Lumos CLI - Cell painting plate image generator + """ + + # annpunce startup + logging.info("Started") + + # Check conditions to process run, plate or single channel + if not scope: + click.echo("Please define a scope") + return + + if scope == "run": + + if channel: + click.echo( + "--channel argument must not be used for run generation. please remove it" + ) + return + + # get run name + run_name = Path(source_path) + + # get plates and their path, only if files in it + click.echo("Scan to detect plate folders..") + + run_folder_list = Path(source_path).glob("**") + + # create a dict with plate name as key and plate folder path as value + # only folders with tif images are eligible + folder_list = { + x.name: x + for x in run_folder_list + if (x.is_dir() and len(fnmatch.filter(os.listdir(x), "*.tif"))) + } + + click.echo( + "Lumos will process " + + str(len(folder_list)) + + " Folders from run:" + + str(run_name) + ) + click.echo("PLates: " + str(list(folder_list.keys()))) + + # generate + # render all the plates of the run + render_single_run_plateview( + source_path, + folder_list, + parameters.default_per_plate_channel_to_render, + parameters.cellplainting_channels_dict, + output_path, + temp_path, + ) + + elif scope == "plate": + + if channel: + click.echo( + "--channel argument must not be used for plate generation. please remove it" + ) + return + + # get platename + plate_name = Path(source_path).name + click.echo( + "Process plate: " + + plate_name + + " and render channels: " + + str(parameters.default_per_plate_channel_to_render), + ) + + # render all the channels of the plate + render_single_plate_plateview( + source_path, + plate_name, + parameters.default_per_plate_channel_to_render, + parameters.cellplainting_channels_dict, + output_path, + temp_path, + ) + + elif scope == "channel": + + if not channel: + click.echo("Missing channel, please define a channel") + return + if not source_path: + click.echo("Missing source plate path, please define a path") + else: + + # get platename + plate_name = Path(source_path).name + click.echo( + "Process plate:" + + plate_name + + " channel:" + + channel + + " " + + str(parameters.cellplainting_channels_dict[channel]), + ) + + # render the channel for the plate + render_single_channel_plateview( + source_path, + plate_name, + channel, + parameters.cellplainting_channels_dict[channel], + output_path, + temp_path, + ) + + # annpunce stop + logging.info("Stop") + + +if __name__ == "__main__": + start() diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..df637a4 --- /dev/null +++ b/readme.md @@ -0,0 +1,23 @@ +# Lumos + +Lumos is a first version of a python script to aggregate pictures obtained from cellpainting assay. This version of lumos fits with images generated with Yokogawa CV8000 High content analyis system. + +Your images must be accessible from your filesystem. + +## Setup +Create a virtualenv & activate it + + python -m venv venv + source venv/bin/activate + +Install depenencies & Install lumos + + pip install -r requirements.txt + python setup.py install + + +## Use + +From your prompt, launch help to see the arguments required. + + lumos --help diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..057bcdf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +opencv-python==4.5.4.60 +pandas==1.3.5 +tqdm==4.62.3 +click==8.0.3 +art==5.4 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2eee9c9 --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup, find_packages + +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = fh.read() +setup( + name = 'lumos', + version = '0.0.2', + author = 'DSDM', + author_email = '', + license = 'NOLICENCE', + description = 'A cell painting plateviewer', + long_description = long_description, + long_description_content_type = "text/markdown", + url = 'https://github.com/nicolasboisseau/lumos', + py_modules = ['lumoscli','lumos'], + packages = find_packages(), + install_requires = [requirements], + python_requires='>=3.8', + classifiers=[ + "Programming Language :: Python :: 3.8", + "Operating System :: OS Independent", + ], + entry_points = ''' + [console_scripts] + lumos=lumoscli:start + ''' +) \ No newline at end of file diff --git a/tests/test_load_image.py b/tests/test_load_image.py new file mode 100644 index 0000000..670dd52 --- /dev/null +++ b/tests/test_load_image.py @@ -0,0 +1,44 @@ +import pytest +import cv2 +import tempfile +import numpy as np +import pandas as pd +from pathlib import Path +import os +from lumos.toolbox import load_site_image + +# Arrange +@pytest.fixture +def fake_image(): + """ Generate a fake image and save it in temp folder + """ + img = np.full((1000, 1000, 1), 32768, np.uint16) + + fake_16_bit_image_file = tempfile.NamedTemporaryFile(delete=False,suffix='.tif') + + cv2.imwrite( + fake_16_bit_image_file.name, + img, + ) + + return fake_16_bit_image_file.name + +@pytest.fixture +def fake_dataframe(fake_image): + """ generate a dataframe with the following columns ["well", "site", "filename", "fullpath"] + + """ + image_path = Path(fake_image) + name = image_path.name + parent = image_path.parent + dict = [{"well":"A01", "site":1, "filename":image_path.name, "fullpath":image_path.parent}] + fake_df = pd.DataFrame.from_dict(dict,orient='columns') + return fake_df + +def test_load_image(fake_dataframe,fake_image): + # Act + fake_image_path = Path(fake_image) + site_image = load_site_image(1,fake_dataframe,str(fake_image_path.parent)) + + # Assert + assert site_image.shape == (1000,1000) \ No newline at end of file