diff --git a/.gitignore b/.gitignore index 181170d..0ebd687 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,9 @@ # RStudio files .Rproj.user/ +/training_data/dapi/images/*.tif +/training_data/dapi/masks/*.tif + # produced vignettes vignettes/*.html vignettes/*.pdf diff --git a/augment.py b/augment.py new file mode 100644 index 0000000..b57cb78 --- /dev/null +++ b/augment.py @@ -0,0 +1,52 @@ +import os +import cv2 + +# Set the paths for the input folders +mask_org_folder = "./training_data/masks_nuclei" +images_org_folder = "./training_data/images_nuclei" + +# Set the paths for the output folders +mask_folder = "./training_data/dapi/masks" +images_folder = "./training_data/dapi/images" + +# Create the output folders if they don't exist +os.makedirs(mask_folder, exist_ok=True) +os.makedirs(images_folder, exist_ok=True) + +# Set the ROI size and step +roi_size = (256, 256) +roi_step = 128 + +# Get the list of file names in the mask_org folder +file_names = [file for file in os.listdir(mask_org_folder) if file.lower().endswith(('.tif', '.tiff'))] + +# Iterate over the file names +for file_name in file_names: + # Read the mask image + mask_path = os.path.join(mask_org_folder, file_name) + mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) + + # Read the corresponding input image + image_path = os.path.join(images_org_folder, file_name) + image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE | cv2.IMREAD_ANYDEPTH) + + # Get the dimensions of the images + mask_height, mask_width = mask.shape[:2] + image_height, image_width = image.shape[:2] + + # Iterate over the ROI positions + for y in range(0, mask_height - roi_size[0] + 1, roi_step): + for x in range(0, mask_width - roi_size[1] + 1, roi_step): + # Extract the ROI from the mask + roi_mask = mask[y:y+roi_size[0], x:x+roi_size[1]] + + # Extract the ROI from the image + roi_image = image[y:y+roi_size[0], x:x+roi_size[1]] + + # Save the ROI as a new image in the mask folder + new_mask_path = os.path.join(mask_folder, f"{file_name}_{y}_{x}.tif") + cv2.imwrite(new_mask_path, roi_mask) + + # Save the ROI as a new image in the images folder + new_image_path = os.path.join(images_folder, f"{file_name}_{y}_{x}.tif") + cv2.imwrite(new_image_path, roi_image) diff --git a/example_image.tif b/example_image.tif new file mode 100644 index 0000000..37c3a26 Binary files /dev/null and b/example_image.tif differ diff --git a/example_labels.tif b/example_labels.tif new file mode 100644 index 0000000..02f24b8 Binary files /dev/null and b/example_labels.tif differ diff --git a/example_rois.zip b/example_rois.zip new file mode 100644 index 0000000..ea1d3bc Binary files /dev/null and b/example_rois.zip differ diff --git a/figure01.md b/figure01.md index f75c09c..d02348c 100644 --- a/figure01.md +++ b/figure01.md @@ -2,6 +2,11 @@ Daniel Fürth

Table of Contents: - [Emission plot Fig. 1c](#emission-plot-fig.-1c) +- [DAPI nuclei cell segmentation in + 2D.](#dapi-nuclei-cell-segmentation-in-2d.) + - [Installation](#installation) + - [Segmentation training](#segmentation-training) + - [Prediction](#prediction) ## Emission plot Fig. 1c @@ -152,3 +157,110 @@ round(AZdye594, 2) ``` [1] 2.88 + +## DAPI nuclei cell segmentation in 2D. + +### Installation + +Make sure you have `conda` installed. Create a new conda environment: + +``` +conda create --name cellseg2D-env +conda activate cellseg2D-env +conda install -c conda-forge napari +conda install opencv +``` + +Install Tensorflow for macOS M1/M2: + +``` +pip install tensorflow-macos +pip install tensorflow-metal +``` + +Install stardist for cell nuclei segmentation: + +``` +pip install gputools +pip install stardist +pip install csbdeep +``` + +### Segmentation training + +#### Augment training data set + +``` +python augment.py +``` + +This expands images_org and masks_org into images (input) and masks +(ground truth). Input and ground truth are matched based on file name. +Format is 8-bit monochrome TIF on both. + +If more than 255 cells needs to be segmented within a single image you +can simply change mask format to 16-bit. + +#### Perform training + +``` +python train_nuclei.py +``` + +Open up tensorboard to follow the results: + +``` +tensorboard --logdir=. +``` + +![Tensorboard lets you monitor the training of the neural +network](./repo_img/tensorboard.png) + +Click the **images** tab of the Tensorboard to inspect the visual output +of the training. This is in the beginning: ![inspect the +output](./repo_img/tensorboard_img.png) + +Here: + +- `net_input` is the input images. Notice a cell is only really present + in the first out of three. +- `net_output0` is the current output from the network. +- `net_target0` is the ground truth (what the network ideally should + have generated). + +### Prediction + +We have a script we can apply to any image for prediction. + +``` +python predict_nuclei.py +``` + +
+ + ++++ + + + + + + +
+

+
+

+
+ +Figure 1: Segmentation results. + +
diff --git a/figure01.qmd b/figure01.qmd index fd53dbf..e17cd68 100644 --- a/figure01.qmd +++ b/figure01.qmd @@ -118,3 +118,84 @@ AZdye594<-quench.ratio$`max(emission)`[3]/quench.ratio$`max(emission)`[4] round(AZdye488, 2) round(AZdye594, 2) ``` + +## DAPI nuclei cell segmentation in 2D. + +### Installation + +Make sure you have `conda` installed. +Create a new conda environment: +```{eval=FALSE} +conda create --name cellseg2D-env +conda activate cellseg2D-env +conda install -c conda-forge napari +conda install opencv +``` + +Install Tensorflow for macOS M1/M2: +```{eval=FALSE} +pip install tensorflow-macos +pip install tensorflow-metal +``` + +Install stardist for cell nuclei segmentation: +```{eval=FALSE} +pip install gputools +pip install stardist +pip install csbdeep +``` + +### Segmentation training + +#### Augment training data set + +```{eval=FALSE} +python augment.py +``` + +This expands images_org and masks_org into images (input) and masks (ground truth). Input and ground truth are matched based on file name. Format is 8-bit monochrome TIF on both. + +If more than 255 cells needs to be segmented within a single image you can simply change mask format to 16-bit. + +#### Perform training + +```{eval=FALSE} +python train_nuclei.py +``` + +Open up tensorboard to follow the results: +```{eval=FALSE} +tensorboard --logdir=. +``` + + + +![Tensorboard lets you monitor the training of the neural network](./repo_img/tensorboard.png) + + +Click the **images** tab of the Tensorboard to inspect the visual output of the training. +This is in the beginning: +![inspect the output](./repo_img/tensorboard_img.png) + +Here: + +- `net_input` is the input images. Notice a cell is only really present in the first out of three. +- `net_output0` is the current output from the network. +- `net_target0` is the ground truth (what the network ideally should have generated). + +### Prediction + +We have a script we can apply to any image for prediction. + +```{eval=FALSE} +python predict_nuclei.py +``` + +::: {#fig-dapi layout-ncol=2} + +![input](./repo_img/example_image.jpg){#fig-input} + +![output](./repo_img/example_labels.jpg){#fig-output} + +Segmentation results. +::: diff --git a/models/dapi/config.json b/models/dapi/config.json new file mode 100644 index 0000000..a5ff63a --- /dev/null +++ b/models/dapi/config.json @@ -0,0 +1 @@ +{"n_dim": 2, "axes": "YXC", "n_channel_in": 1, "n_channel_out": 33, "train_checkpoint": "weights_best.h5", "train_checkpoint_last": "weights_last.h5", "train_checkpoint_epoch": "weights_now.h5", "n_rays": 32, "grid": [2, 2], "backbone": "unet", "n_classes": null, "unet_n_depth": 3, "unet_kernel_size": [3, 3], "unet_n_filter_base": 32, "unet_n_conv_per_depth": 2, "unet_pool": [2, 2], "unet_activation": "relu", "unet_last_activation": "relu", "unet_batch_norm": false, "unet_dropout": 0.0, "unet_prefix": "", "net_conv_after_unet": 128, "net_input_shape": [null, null, 1], "net_mask_shape": [null, null, 1], "train_shape_completion": false, "train_completion_crop": 32, "train_patch_size": [256, 256], "train_background_reg": 0.0001, "train_foreground_only": 0.9, "train_sample_cache": true, "train_dist_loss": "mae", "train_loss_weights": [1, 0.2], "train_class_weights": [1, 1], "train_epochs": 400, "train_steps_per_epoch": 100, "train_learning_rate": 0.0003, "train_batch_size": 4, "train_n_val_patches": null, "train_tensorboard": true, "train_reduce_lr": {"factor": 0.5, "patience": 40, "min_delta": 0}, "use_gpu": false} \ No newline at end of file diff --git a/models/dapi/logs/images/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.0.v2 b/models/dapi/logs/images/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.0.v2 new file mode 100644 index 0000000..12b091d Binary files /dev/null and b/models/dapi/logs/images/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.0.v2 differ diff --git a/models/dapi/logs/train/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.1.v2 b/models/dapi/logs/train/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.1.v2 new file mode 100644 index 0000000..3411dd6 Binary files /dev/null and b/models/dapi/logs/train/events.out.tfevents.1695132333.UU-N9KR2WKWCL.637.1.v2 differ diff --git a/models/dapi/logs/validation/events.out.tfevents.1695132347.UU-N9KR2WKWCL.637.2.v2 b/models/dapi/logs/validation/events.out.tfevents.1695132347.UU-N9KR2WKWCL.637.2.v2 new file mode 100644 index 0000000..45359a2 Binary files /dev/null and b/models/dapi/logs/validation/events.out.tfevents.1695132347.UU-N9KR2WKWCL.637.2.v2 differ diff --git a/models/dapi/thresholds.json b/models/dapi/thresholds.json new file mode 100644 index 0000000..60382ea --- /dev/null +++ b/models/dapi/thresholds.json @@ -0,0 +1 @@ +{"prob": 0.46905075238803473, "nms": 0.3} \ No newline at end of file diff --git a/models/dapi/weights_best.h5 b/models/dapi/weights_best.h5 new file mode 100644 index 0000000..bf96e8a Binary files /dev/null and b/models/dapi/weights_best.h5 differ diff --git a/models/dapi/weights_last.h5 b/models/dapi/weights_last.h5 new file mode 100644 index 0000000..4a590bd Binary files /dev/null and b/models/dapi/weights_last.h5 differ diff --git a/predict_nuclei.py b/predict_nuclei.py new file mode 100644 index 0000000..a81aa31 --- /dev/null +++ b/predict_nuclei.py @@ -0,0 +1,35 @@ +from __future__ import print_function, unicode_literals, absolute_import, division +import sys +import numpy as np +import matplotlib +matplotlib.rcParams["image.interpolation"] = 'none' +import matplotlib.pyplot as plt + +from glob import glob +from tifffile import imread +from csbdeep.utils import Path, normalize +from csbdeep.io import save_tiff_imagej_compatible + +from stardist import random_label_cmap, _draw_polygons, export_imagej_rois +from stardist.models import StarDist2D + +np.random.seed(6) +lbl_cmap = random_label_cmap() + +X = sorted(glob('./training_data/images_nuclei/*.tif')) +X = list(map(imread,X)) + +n_channel = 1 if X[0].ndim == 2 else X[0].shape[-1] +axis_norm = (0,1) # normalize channels independently +# axis_norm = (0,1,2) # normalize channels jointly +if n_channel > 1: + print("Normalizing image channels %s." % ('jointly' if axis_norm is None or 2 in axis_norm else 'independently')) + +model = StarDist2D(None, name='dapi', basedir='models') + +img = normalize(X[0], 1,99.8, axis=axis_norm) +labels, details = model.predict_instances(img) + +save_tiff_imagej_compatible('example_image.tif', img, axes='YX') +save_tiff_imagej_compatible('example_labels.tif', labels, axes='YX') +export_imagej_rois('example_rois.zip', details['coord']) diff --git a/repo_img/example_image.jpg b/repo_img/example_image.jpg new file mode 100644 index 0000000..79e1ce2 Binary files /dev/null and b/repo_img/example_image.jpg differ diff --git a/repo_img/example_labels.jpg b/repo_img/example_labels.jpg new file mode 100644 index 0000000..2b68f31 Binary files /dev/null and b/repo_img/example_labels.jpg differ diff --git a/repo_img/tensorboard.png b/repo_img/tensorboard.png new file mode 100644 index 0000000..a0fa004 Binary files /dev/null and b/repo_img/tensorboard.png differ diff --git a/repo_img/tensorboard_img.png b/repo_img/tensorboard_img.png new file mode 100644 index 0000000..5ced3b2 Binary files /dev/null and b/repo_img/tensorboard_img.png differ diff --git a/repo_img/tensorboard_later.png b/repo_img/tensorboard_later.png new file mode 100644 index 0000000..3efdcb4 Binary files /dev/null and b/repo_img/tensorboard_later.png differ diff --git a/train.py b/train.py deleted file mode 100644 index e69de29..0000000 diff --git a/train_nuclei.py b/train_nuclei.py new file mode 100644 index 0000000..f9b8ad3 --- /dev/null +++ b/train_nuclei.py @@ -0,0 +1,115 @@ +from __future__ import print_function, unicode_literals, absolute_import, division +import sys +import numpy as np +import matplotlib +matplotlib.rcParams["image.interpolation"] = 'none' +import matplotlib.pyplot as plt + +from glob import glob +from tqdm import tqdm +from tifffile import imread +from csbdeep.utils import Path, normalize + +from stardist import fill_label_holes, random_label_cmap, calculate_extents, gputools_available +from stardist.matching import matching, matching_dataset +from stardist.models import Config2D, StarDist2D, StarDistData2D + +X = sorted(glob('training_data/dapi/images/*.tif')) +Y = sorted(glob('training_data/dapi/masks/*.tif')) +assert all(Path(x).name==Path(y).name for x,y in zip(X,Y)) + +X = list(map(imread,X)) +Y = list(map(imread,Y)) +n_channel = 1 if X[0].ndim == 2 else X[0].shape[-1] + +axis_norm = (0,1) # normalize channels independently +# axis_norm = (0,1,2) # normalize channels jointly +if n_channel > 1: + print("Normalizing image channels %s." % ('jointly' if axis_norm is None or 2 in axis_norm else 'independently')) + sys.stdout.flush() + +X = [normalize(x,1,99.8,axis=axis_norm) for x in tqdm(X)] +Y = [fill_label_holes(y) for y in tqdm(Y)] + + +assert len(X) > 1, "not enough training data" +rng = np.random.RandomState(42) +ind = rng.permutation(len(X)) +n_val = max(1, int(round(0.15 * len(ind)))) +ind_train, ind_val = ind[:-n_val], ind[-n_val:] +X_val, Y_val = [X[i] for i in ind_val] , [Y[i] for i in ind_val] +X_trn, Y_trn = [X[i] for i in ind_train], [Y[i] for i in ind_train] +print('number of images: %3d' % len(X)) +print('- training: %3d' % len(X_trn)) +print('- validation: %3d' % len(X_val)) + + +# 32 is a good default choice (see 1_data.ipynb) +n_rays = 32 + +# Use OpenCL-based computations for data generator during training (requires 'gputools') +use_gpu = False and gputools_available() + +# Predict on subsampled grid for increased efficiency and larger field of view +grid = (2,2) + +conf = Config2D ( + n_rays = n_rays, + grid = grid, + use_gpu = use_gpu, + n_channel_in = n_channel, +) +print(conf) +vars(conf) + + +if use_gpu: + from csbdeep.utils.tf import limit_gpu_memory + # adjust as necessary: limit GPU memory to be used by TensorFlow to leave some to OpenCL-based computations + limit_gpu_memory(0.8) + # alternatively, try this: + # limit_gpu_memory(None, allow_growth=True) + + +model = StarDist2D(conf, name='dapi', basedir='models') + +median_size = calculate_extents(list(Y), np.median) +fov = np.array(model._axes_tile_overlap('YX')) +print(f"median object size: {median_size}") +print(f"network field of view : {fov}") +if any(median_size > fov): + print("WARNING: median object size larger than field of view of the neural network.") + +def random_fliprot(img, mask): + assert img.ndim >= mask.ndim + axes = tuple(range(mask.ndim)) + perm = tuple(np.random.permutation(axes)) + img = img.transpose(perm + tuple(range(mask.ndim, img.ndim))) + mask = mask.transpose(perm) + for ax in axes: + if np.random.rand() > 0.5: + img = np.flip(img, axis=ax) + mask = np.flip(mask, axis=ax) + return img, mask + +def random_intensity_change(img): + img = img*np.random.uniform(0.6,2) + np.random.uniform(-0.2,0.2) + return img + + +def augmenter(x, y): + """Augmentation of a single input/label image pair. + x is an input image + y is the corresponding ground-truth label image + """ + x, y = random_fliprot(x, y) + x = random_intensity_change(x) + # add some gaussian noise + sig = 0.02*np.random.uniform(0,1) + x = x + sig*np.random.normal(0,1,x.shape) + return x, y + +model.train(X_trn, Y_trn, validation_data=(X_val,Y_val), augmenter=augmenter) + +model.optimize_thresholds(X_val, Y_val) + diff --git a/training_data/images_cytosol/CMYC_6.tif b/training_data/images_cytosol/CMYC_6.tif new file mode 100644 index 0000000..5677778 Binary files /dev/null and b/training_data/images_cytosol/CMYC_6.tif differ diff --git a/training_data/images_cytosol/CMYC_MAX_4.tif b/training_data/images_cytosol/CMYC_MAX_4.tif new file mode 100644 index 0000000..3a440dd Binary files /dev/null and b/training_data/images_cytosol/CMYC_MAX_4.tif differ diff --git a/training_data/images_cytosol/MAX_5.tif b/training_data/images_cytosol/MAX_5.tif new file mode 100644 index 0000000..361bc8e Binary files /dev/null and b/training_data/images_cytosol/MAX_5.tif differ diff --git a/training_data/images_cytosol/NEG_3.tif b/training_data/images_cytosol/NEG_3.tif new file mode 100644 index 0000000..7d2508e Binary files /dev/null and b/training_data/images_cytosol/NEG_3.tif differ diff --git a/training_data/images_nuclei/CMYC_6.tif b/training_data/images_nuclei/CMYC_6.tif new file mode 100644 index 0000000..ec70d1a Binary files /dev/null and b/training_data/images_nuclei/CMYC_6.tif differ diff --git a/training_data/images_nuclei/CMYC_MAX_4.tif b/training_data/images_nuclei/CMYC_MAX_4.tif new file mode 100644 index 0000000..5c8fe52 Binary files /dev/null and b/training_data/images_nuclei/CMYC_MAX_4.tif differ diff --git a/training_data/images_nuclei/MAX_5.tif b/training_data/images_nuclei/MAX_5.tif new file mode 100644 index 0000000..7c7ca2e Binary files /dev/null and b/training_data/images_nuclei/MAX_5.tif differ diff --git a/training_data/images_nuclei/NEG_3.tif b/training_data/images_nuclei/NEG_3.tif new file mode 100644 index 0000000..32119ef Binary files /dev/null and b/training_data/images_nuclei/NEG_3.tif differ diff --git a/training_data/phalloidin_MAX_CMYC_6_img.tif b/training_data/masks_cytosol/CMYC_6.tif similarity index 100% rename from training_data/phalloidin_MAX_CMYC_6_img.tif rename to training_data/masks_cytosol/CMYC_6.tif diff --git a/training_data/phalloidin_CMYC_MAX_4_img.tif b/training_data/masks_cytosol/CMYC_MAX_4.tif similarity index 100% rename from training_data/phalloidin_CMYC_MAX_4_img.tif rename to training_data/masks_cytosol/CMYC_MAX_4.tif diff --git a/training_data/phalloidin_MAX_MAX_5_img.tif b/training_data/masks_cytosol/MAX_5.tif similarity index 100% rename from training_data/phalloidin_MAX_MAX_5_img.tif rename to training_data/masks_cytosol/MAX_5.tif diff --git a/training_data/phalloidin_NEG_3_img.tif b/training_data/masks_cytosol/NEG_3.tif similarity index 100% rename from training_data/phalloidin_NEG_3_img.tif rename to training_data/masks_cytosol/NEG_3.tif diff --git a/training_data/dapi_MAX_CMYC_6_img.tif b/training_data/masks_nuclei/CMYC_6.tif similarity index 100% rename from training_data/dapi_MAX_CMYC_6_img.tif rename to training_data/masks_nuclei/CMYC_6.tif diff --git a/training_data/dapi_CMYC_MAX_4_img.tif b/training_data/masks_nuclei/CMYC_MAX_4.tif similarity index 100% rename from training_data/dapi_CMYC_MAX_4_img.tif rename to training_data/masks_nuclei/CMYC_MAX_4.tif diff --git a/training_data/dapi_MAX_MAX_5_img.tif b/training_data/masks_nuclei/MAX_5.tif similarity index 100% rename from training_data/dapi_MAX_MAX_5_img.tif rename to training_data/masks_nuclei/MAX_5.tif diff --git a/training_data/dapi_NEG_3_img.tif b/training_data/masks_nuclei/NEG_3.tif similarity index 100% rename from training_data/dapi_NEG_3_img.tif rename to training_data/masks_nuclei/NEG_3.tif