Source code for pysmFISH.counting

import pickle
import numpy as np
from skimage import io, img_as_float
from .dots_calling import thr_calculator
from .filtering import smFISH_filtering, nuclei_filtering



[docs]def filtering_and_counting(fpath_img_to_filter,filtered_png_img_gene_dirs,filtered_img_gene_dirs, counting_gene_dirs, illumination_correction=False ,plane_keep=None, min_distance=5, stringency=0, skip_genes_counting=None,skip_tags_counting=None): """ Function used to clean the images and to count the smFISH dots. It is designed to process in parallel all the tmp file images stored as numpy arrays after conversion from the microscope format. Parameters: ------------ fpath_img_to_filter: str path to the file to process filtered_png_img_gene_dirs: list list of the paths of the directories where the filtered images as are saved as pngs. filtered_img_gene_dirs: list list of the paths of the directories where the filtered images are saved as .npy. counting_gene_dirs: list list of the paths of the directories where the countings of the filtered images are saved. illumination_correction: bool if True the illumination correction is run on the dataset. plane_keep: list start and end point of the z-planes to keep. Default None keep all the planes (ex. [2,-3]). min_distance: int minimum distance between dots. stringency: int stringency use to select the threshold used for counting. skip_genes_counting: list list of the genes to skip for counting count. skip_tags_counting: list list of the tags inside the genes/stainings name to avoid to count. """ # Get infos from file name fname_split = fpath_img_to_filter.split('/')[-1].split('_') experiment_name = fname_split[0] hyb = fname_split[1] gene = fname_split[2] pos = fname_split[4].split('.')[0] # Load the image to process img_stack = np.load(fpath_img_to_filter) # image is np.uint16 img_stack = img_as_float(img_stack) # Remove extra planes. As it is for now this step is mainly for single image # usage. I will include the automatic excess planes remove function to use # for large scale image analysis later on if isinstance(plane_keep,list): img_stack = img_stack[plane_keep[0]:plane_keep[1],:,:] # Filtering image according to gene if gene in skip_genes_counting or [tag for tag in skip_tags_counting if tag in gene]: # Remove the background from the nuclei img_filtered = nuclei_filtering(img_stack) counting_dict = None else: # Remove background and enhance smFISH signal img_filtered=smFISH_filtering(img_stack) # Count the dots in the whole image counting_dict=thr_calculator(img_filtered,min_distance,stringency) # Non converted img img_filtered_original = img_filtered.copy() # Convert image to uint16 # Clip the values above 1 img_filtered[img_filtered>1] = 1 # Scale to the max of the uint16 img_filtered *= np.iinfo(np.uint16).max # Round and convert to integer img_filtered = np.uint16(np.rint(img_filtered)) # Save images and dictionary # This part may be removed from the function in case we will run # temporary storage in RAM in order to reduce i/o to the common # HD of the cluster # Identify the directory for storing the images and the counting img_saving_dir_npy=[saving_dir for saving_dir in filtered_img_gene_dirs if gene in saving_dir.split('/')[-2] ][0] img_saving_dir_png=[saving_dir for saving_dir in filtered_png_img_gene_dirs if gene in saving_dir.split('/')[-2] ][0] # Save the images and the counting if performed fname_png = img_saving_dir_png+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.png' io.imsave(fname_png,img_filtered) fname_npy = img_saving_dir_npy+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.npy' np.save(fname_npy,img_filtered_original,allow_pickle=False) if counting_dict: # may missing if I don't want the counting counting_saving_dir=[saving_dir for saving_dir in counting_gene_dirs if gene in saving_dir.split('/')[-2] ][0] fname = counting_saving_dir+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.pkl' pickle.dump(counting_dict,open(fname,'wb')) return
[docs]def filtering_and_counting_experiment(fpath_img_to_filter,filtered_dir_path, counting_dir_path,exp_name,add_slash,plane_keep=None, min_distance=5,stringency=0): """ Function to filter and count dots in the images generated from a small experiment. Parameters: ------------ fpath_img_to_filter: str path to the file to process. filtered_dir_path: list list of the paths of the directories where the filtered images are saved. counting_dir_path: list list of the paths of the directories where the counting of filtered images are stored. exp_name: str name of the experiment to process. plane_keep: list start and end point of the z-planes to keep. Default None keep all the planes (ex. [2,-3]). min_distance: int minimum distance between dots. stringency: int stringency use to select the threshold used for counting. """ # Load the image to process img_stack = np.load(fpath_img_to_filter) # image is np.uint16 img_stack = img_as_float(img_stack) # Remove extra planes. As it is for now this step is mainly for single image # usage. I will include the automatic excess planes remove function to use # for large scale image analysis later on if plane_keep: img_stack = img_stack[plane_keep[0]:plane_keep[1],:,:] channel = fpath_img_to_filter.split(add_slash)[-1].split('_')[-3] fov = fpath_img_to_filter.split(add_slash)[-1].split('_')[-1].split('.')[0] not_counting=['Nuclei','Dapi','DAPI'] # Filtering image according to gene if channel in not_counting or '-IF' in channel or channel == 'polyA': # Remove the background from the nuclei img_filtered = nuclei_filtering(img_stack) counting_dict = None else: # Remove background and enhance smFISH signal img_filtered=smFISH_filtering(img_stack) # Count the dots in the whole image counting_dict=thr_calculator(img_filtered,min_distance,stringency) # Convert image to uint16 # Clip the values above 1 img_filtered[img_filtered>1] = 1 # Scale to the max of the uint16 img_filtered *= np.iinfo(np.uint16).max # Round and convert to integer img_filtered = np.uint16(np.rint(img_filtered)) fname = fpath_img_to_filter.split(add_slash)[-1][:-4] fname_path_png = filtered_dir_path+add_slash+exp_name+'_'+fname+'.png' io.imsave(fname_path_png,img_filtered) if counting_dict: fname_path_pkl = counting_dir_path+add_slash+exp_name+'_'+fname+'.pkl' pickle.dump(counting_dict,open(fname_path_pkl,'wb')) return
[docs]def filtering_and_counting_ill_correction(fpath_img_to_filter,illumination_function, filtered_png_img_gene_dirs,filtered_img_gene_dirs, counting_gene_dirs, illumination_correction=False ,plane_keep=None, min_distance=5, stringency=0, skip_genes_counting=None,skip_tags_counting=None): """ Function used to clean the images and to count the smFISH dots. Designed to work in parallel processing all the tmp file images stored as numpy arrays after conversion from the microscope format. Parameters: ------------ fpath_img_to_filter: str path to the file to process. illumination_function: np.array float64 illumination function. filtered_png_img_gene_dirs: list list of the paths of the directories where the filtered images as are saved as pngs. filtered_img_gene_dirs: list list of the paths of the directories where the filtered images are saved as .npy. counting_gene_dirs: list list of the paths of the directories where the countings of the filtered images are saved. illumination_correction: bool if True the illumination correction is run on the dataset. plane_keep: list start and end point of the z-planes to keep. Default None keep all the planes (ex. [2,-3]). min_distance: int minimum distance between dots. stringency: int stringency use to select the threshold used for counting. skip_genes_counting: list list of the genes to skip for counting count. skip_tags_counting: list list of the tags inside the genes/stainings name to avoid to count. """ # Get infos from file name fname_split = fpath_img_to_filter.split('/')[-1].split('_') experiment_name = fname_split[0] hyb = fname_split[1] gene = fname_split[2] pos = fname_split[4].split('.')[0] # Load the image to process img_stack = np.load(fpath_img_to_filter) # image is np.uint16 img_stack = img_as_float(img_stack) # Remove extra planes. As it is for now this step is mainly for single image # usage. I will include the automatic excess planes remove function to use # for large scale image analysis later on if isinstance(plane_keep,list): img_stack = img_stack[plane_keep[0]:plane_keep[1],:,:] # Correct for illumination img_stack = img_stack/illumination_function # Filtering image according to gene if gene in skip_genes_counting or [tag for tag in skip_tags_counting if tag in gene]: # Remove the background from the nuclei img_filtered = nuclei_filtering(img_stack) counting_dict = None else: # Remove background and enhance smFISH signal img_filtered=smFISH_filtering(img_stack) # Count the dots in the whole image counting_dict=thr_calculator(img_filtered,min_distance,stringency) # Non converted img img_filtered_original = img_filtered.copy() # Convert image to uint16 # Clip the values above 1 img_filtered[img_filtered>1] = 1 # Scale to the max of the uint16 img_filtered *= np.iinfo(np.uint16).max # Round and convert to integer img_filtered = np.uint16(np.rint(img_filtered)) # Save images and dictionary # This part may be removed from the function in case we will run # temporary storage in RAM in order to reduce i/o to the common # HD of the cluster # Identify the directory for storing the images and the counting img_saving_dir_npy=[saving_dir for saving_dir in filtered_img_gene_dirs if gene in saving_dir.split('/')[-2] ][0] img_saving_dir_png=[saving_dir for saving_dir in filtered_png_img_gene_dirs if gene in saving_dir.split('/')[-2] ][0] # Save the images and the counting if performed fname_png = img_saving_dir_png+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.png' io.imsave(fname_png,img_filtered) fname_npy = img_saving_dir_npy+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.npy' np.save(fname_npy,img_filtered_original,allow_pickle=False) if counting_dict: # may missing if I don't want the counting counting_saving_dir=[saving_dir for saving_dir in counting_gene_dirs if gene in saving_dir.split('/')[-2] ][0] fname = counting_saving_dir+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.pkl' pickle.dump(counting_dict,open(fname,'wb')) return
[docs]def counting_only(fpath_img_to_count,counting_gene_dirs, min_distance=5, stringency=0): """ Function used to clean the images and to count the smFISH dots. It is designed to process in parallel all the tmp file images stored as numpy arrays after conversion from the microscope format. Parameters: ------------ fpath_img_to_count: str path to the file to process counting_gene_dirs: list list of the paths of the directories where the countings of the filtered images are saved. min_distance: int minimum distance between dots. stringency: int stringency use to select the threshold used for counting. """ # Get infos from file name fname_split = fpath_img_to_count.split('/')[-1].split('_') experiment_name = fname_split[0] hyb = fname_split[1] gene = fname_split[2] pos = fname_split[4].split('.')[0] # Load the image to process img = np.load(fpath_img_to_count) # image is np.uint16 img = img_as_float(img) # Count the dots in the whole image counting_dict=thr_calculator(img,min_distance,stringency) counting_saving_dir=[saving_dir for saving_dir in counting_gene_dirs if gene in saving_dir.split('/')[-2] ][0] fname = counting_saving_dir+experiment_name+'_'+hyb+'_'+gene+'_'+'pos_'+pos+'.pkl' pickle.dump(counting_dict,open(fname,'wb'))