Source code for dots_coords_correction

#!/usr/bin/env python

import argparse
from distributed import Client,LocalCluster
import multiprocessing
import pickle

from pysmFISH import dots_coords_calculations
from pysmFISH import utils



[docs]def dots_coords_correction(): """ This script is used to collect all the raw countings from the different hybridization, correct the coords according to the registration of the reference gene and remove the dots that overlap in the overlapping regions between the images. Save the aggregate coords and also the coords after dots processing Input via argparse Parameters: ----------- path: string. Exact path to the experiment folder pxl: int Radius of pixel used to create the neighbourhood (nhood) used to define when two dots are the same """ # Inputs of the function parser = argparse.ArgumentParser(description='Dots coords consolidation \ and correction') parser.add_argument('-path', help='path to the experiment folder') parser.add_argument('-pixel_radius', help='adius of pixel used to create the nhood \ that is used to define that two pixels are the same', type=int) parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') args = parser.parse_args() # retrieve the parameters processing_experiment_directory = args.path pxl = args.pixel_radius # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the experiment directory processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows) stitched_reference_files_dir = processing_experiment_directory + 'stitched_reference_files' # Check training slash in the stitched reference directory stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows) # Collect the infos of the experiment and the processing # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(processing_experiment_directory) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory) # get the reference gene reference_gene = flt_rawcnt_config['reference_gene'] # get the overlapping percentage and image_size overlapping_percentage = image_properties['Overlapping_percentage'] # Consider a square image image_size = image_properties['HybImageSize']['columns'] # Combine all counts all_raw_counts = dots_coords_calculations.combine_raw_counting_results(flt_rawcnt_config, hybridizations_infos,experiment_infos, processing_experiment_directory,stitched_reference_files_dir, reference_gene,add_slash) # Create a dictionary with only the selected peaks coords after alignment aligned_peaks_dict = all_raw_counts['selected_peaks_coords_aligned'] # Create list of tuples to process each hybridization/gene on a different core combinations = dots_coords_calculations.processing_combinations(list(hybridizations_infos.keys()),aligned_peaks_dict) # Add corresponding registration_data and the corresponding coords files to the # tuple is order to recduce the size of the info transferred in the newtwork added_combinations =list() for idx,combination in enumerate(combinations): hybridization = combination[0] gene = combination[1] reg_data_combination = all_raw_counts['registration_data'][hybridization] aligned_peaks_dict_gene = all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene] combination_dict = { 'hybridization':hybridization, 'gene':gene, 'reg_data_combination':reg_data_combination, 'aligned_peaks_dict_gene': aligned_peaks_dict_gene } added_combinations.append(combination_dict) # Process each gene in parallel futures_processes = client.map(dots_coords_calculations.function_to_run_dots_removal_parallel,added_combinations, overlapping_percentage = overlapping_percentage, image_size = image_size,pxl = pxl) cleaned_dots_list = client.gather(futures_processes) # Convert the list of dictionaries in one single dictionary # The saved dictionary cotains all the dots, the reference to the tile pos # has been removed during the overlapping dots removal step all_countings = dict() all_countings['all_coords_cleaned'] = dict() all_countings['all_coords'] = dict() all_countings['removed_coords'] = dict() for el in cleaned_dots_list: hybridization = list(el.keys())[0] gene = list(el[hybridization].keys())[0] renamed_gene = gene + '_' + hybridization all_countings['all_coords_cleaned'][renamed_gene] = el[hybridization][gene]['all_coords_cleaned'] all_countings['all_coords'][renamed_gene] = el[hybridization][gene]['all_coords'] all_countings['removed_coords'][renamed_gene] = el[hybridization][gene]['removed_coords'] # Save all the data counting_data_name = processing_experiment_directory +experiment_infos['ExperimentName']+'_all_cleaned_raw_counting_data.pkl' pickle.dump(all_countings,open(counting_data_name,'wb')) client.close()
if __name__ == "__main__": dots_coords_correction()