Source code for pysmFISH.dots_coords_calculations

import numpy as np
import pickle
import glob
import re

[docs]def register_dots_coords(reg_data,hybridization,gene,all_raw_counts):
    
    """
    Function to register the coords of the dots according to the new coords
    of the corner of the images calculated after registration of the stitched 
    images.
    
    Parameters:
    -----------

    reg_data: dict 
        Dictionary containing the coords and the joining
        data calculated after the registration.
    hybridization: str 
        Name of the hybridization processed (Ex. Hybridization2)
    gene: str 
        Processed gene name (Ex. Gfap)
    all_raw_counts: dict 
        Dictionary with all the raw counting from all the hybridizations
    
    Returns:
    ---------

    all_raw_counts: dict 
        Dictionary with all the raw counting from all the hybridizations. 
        Added a subgroup containing the aligned coords.
    """
    
    
    # tile_set = reg_data['micData'].tile_set.data
    # tile_set = tile_set.ravel()
    # for pos in all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene].keys():  
    #     # control if missing positions (useful if you image multiple ROI in same session)
    #     res = np.where(tile_set==np.int(pos))[0]
    #     if res:
    #         idx = res[0]
    #         corner_coords = reg_data['joining']['corner_list'][idx][1]
    #         old_coords = all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene][pos]
    #         if not np.all(old_coords==0):
    #             corrected_coords = old_coords + corner_coords
    #             all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene][pos] = corrected_coords


    tile_set = reg_data['micData'].tile_set.data
    tiles = reg_data['micData'].tile_nr
    tile_set = tile_set.ravel()
    for pos in all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene].keys():  
        # control if missing positions (useful if you image multiple ROI in same session)
        tmp_idx = np.where(tiles==np.int(pos))[0]
        if tmp_idx:
            tmp_idx=tmp_idx[0]
            res = np.where(tile_set==tmp_idx)[0]
            if res:
                idx = res[0]
                corner_coords = reg_data['joining']['corner_list'][idx][1]
                old_coords = all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene][pos]
                if not np.all(old_coords==0):
                    corrected_coords = old_coords + corner_coords
                    all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene][pos] = corrected_coords


    return all_raw_counts


[docs]def processing_combinations(hybridizations,aligned_peaks_dict):
    
    """
    Function used to create a list of tuples containing the pair hybridization
    and gene (Ex. (Hybridization1, Gfap)) that will be used to process the dots
    removal in parallel.
    
    Parameters:
    -----------

    hybridizations: list 
        List of the hybridization to process 
        (ex. [Hybridization1, Hybridization2]).
    aligned_peaks_dict: dict 
        Dictionary containing only the coords of the selected peaks after 
        registration (still with pos information).
                                
                                
    Returns:
    ---------
    combinations: list 
        List containing the pairs hybridization and gene 
        (Ex. (Hybridization1, Gfap))
    
    """  
    
    combinations_tmp=list()
    if isinstance(hybridizations,list):
        for hybridization in hybridizations:
            combinations_tmp.append([(hybridization,gene) for gene in aligned_peaks_dict[hybridization].keys()])
        combinations=[el for grp in combinations_tmp for el in grp]
    else:
        hybridization=hybridizations
        combinations=[(hybridization,gene) for gene in aligned_peaks_dict[hybridization].keys()]
    return combinations


[docs]def overlapping_images_grid(micData):
    """
    Function used to generate all possible overlapping images (neighborhood).
    
    Parameters:
    -----------

    micData: dict 
        Dictionary extracted from the reg_data and containing the infomation 
        regarding tiles organization
    
    Returns:
    ---------

    all_nhood: list 
        List with all the overlapping tiles combinations
    
    """
    
    tile_set=micData.tile_set.data
    Img_r,Img_c=tile_set.shape
    all_nhood=list()

    for r in np.arange(Img_r):
        for c in np.arange(Img_c):
            # Skip the position with the center tile ==-1 in the ImgSet
            if tile_set[r,c]!=-1:
                # I know the order of the neighboouring objects
                coordinates=[(r+i, c+j) for j in np.arange(-1,2) for i in np.arange(-1,2)
                             if ((0<=r+i<=Img_r-1) and (0<=c+j<=Img_c-1))]
                # Remove the pairs where -1 is in the ImgSet
                coordinates=[i for i in coordinates if tile_set[i]!=-1 ]
                if coordinates:
                    all_nhood.append(tuple(coordinates)) 
    return tuple(all_nhood)


[docs]def hood_creation(coords,pxl):
    
    """
    Function that creates the neighborood of coords that are considered 
    same RNA molecule. It is the region used to eliminate overlapping dots.
    
    Parameters:
    -----------

    coords: np.array 
        Array of the [r,c] of the dots.
    pxl: int
        Radius of pixel used to create the nhood.
    
    Return:
    ---------

    hood: set 
        Set with the coords of the neighborhood of the selected dot.
    
    """
    
    rg=np.arange(-(pxl),pxl+1)
    hood=tuple([(coords[0]+i,coords[1]+j) for i in rg for j in rg])
    return set(hood)


[docs]def dots_removing(overlapping_list,pxl):
    """
    Function used to identify matching dots that are present in overlapping 
    imaging areas.
    
    Parameters:
    -----------

    overlapping_list: list 
        List with the coords of the dots located in the overlapping region.                    
    pxl: int 
        Radius of pixel used to create the nhood.


    Returns:
    ---------

    removed_dots: set 
        Set with the coords of the removed dots.
    single_dots: set 
        Set with the coords of dots in the overlpping region that do not 
        overlap with any other dot.
    
    """
    
    removed_dots_coords=list()
    single_dots=set(overlapping_list.copy())
    remove_dot_step={}
    for coords in overlapping_list:
        if not {coords}.intersection(remove_dot_step):
            hood=hood_creation(coords,pxl)
            # Remove pxl to keep from hood
            hood=hood.difference({coords})
            single_dots=single_dots.difference(hood)
            remove_dot_step=set(overlapping_list).difference(set(single_dots))
    removed_dots=set(overlapping_list).difference(set(single_dots))
    return removed_dots,single_dots


[docs]def catch_duplicated_dots(combination, image_list,
                          overlapping_percentage,image_size,pxl):
    
    # dots_removing
    
    """
    Function used to identify dots that need to be removed or kept in the
    overlapping region between two images. The overlapping used is calculated
    according to the stitching information.
    
    Parameters:
    -----------

    combination: dict 
        Dictionary containing hybridization, gene, registation data of the 
        current hybridization and the registerd coords of the selected peaks 
        for the current gene.
    image_list: tuple 
        Tuple with the index of the the images with ooverlap
    overlapping_percentage: float 
        Percentage of overlap extracted from the Experimental_metadata.yaml.
    image_size: int 
        Size of the image (consider a square image).     
    pxl: int 
        Radius of pixel used to create the nhood.
    
    Returns:
    -----------

    overlapping_result_dict: dict 
        Dictionary with the coords of the dots that need to be removed or will 
        be kept for the selected pair of images.
    
    """
     
    # Extract the data
    micData = combination['reg_data_combination']['micData']
    joining = combination['reg_data_combination']['joining']
    aligned_peaks_dict_gene = combination['aligned_peaks_dict_gene']
    gene = combination['gene']
    hybridization = combination['hybridization']
    
    
    # Determine the image set from the stitched data
    img_set = micData.tile_set.data
    img_set_rav=img_set.ravel()

    # Create the overlapping results dict
    overlapping_result_dict=dict()
    overlapping_result_dict['row_dir']={}
    overlapping_result_dict['col_dir']={}

    overlapping_pixels=overlapping_percentage*image_size
    
    # Sort the list by row
    image_list=list(image_list)
    image_list.sort(key=lambda tup: tup[0]) 

    # work on rows
    rows=[el[0] for el in image_list]
    rows=set(rows)

    cols=[el[1] for el in image_list]
    cols=set(cols)

    overlapping=list()
    non_overlapping=list()
    counter=0

    # Process the images in rows
    for r in rows:
        row_grp=[ el for el in image_list if el[0] == r ]

        # Calculate the overlapping
        for p in np.arange(len(row_grp)-1):
            overlapping=list()
            non_overlapping=list()

            r,c=row_grp[p]
            pos_l=img_set[r,c]
            idx=np.where(img_set_rav==np.int(pos_l))[0][0]
            corner_left=joining['corner_list'][idx]
            corner_coords_left=corner_left[1]

            r,c=row_grp[p+1]
            pos_r=img_set[r,c]
            idx=np.where(img_set_rav==np.int(pos_r))[0][0]
            corner_right=joining['corner_list'][idx]
            corner_coords_right=corner_right[1]

            positions=[pos_l,pos_r]

            # Calculation expected coords in cols
            expected_corner_c=corner_coords_left[1]+image_size-overlapping_pixels
            c_error=np.abs(corner_coords_right[1]-expected_corner_c)
            r_error=np.abs(corner_coords_left[0]-corner_coords_right[0])

            # Remember I know the order of image acquisition
            # Determine the overlapping bounding-box
            r_TL=np.maximum(corner_coords_left[0],corner_coords_right[0])
            c_TL=corner_coords_right[1]
            TL=(r_TL,c_TL)

            r_BR=np.minimum(corner_coords_left[0]+image_size,corner_coords_right[0]+image_size)
            c_BR=corner_coords_left[1]+image_size
            BR=(r_BR,c_BR)


            # identify the dots in the overlapping region
            for pos in positions:
                if pos in aligned_peaks_dict_gene.keys():
                    dots=aligned_peaks_dict_gene[pos]

                    for dot in dots:
                            if (dot>=TL).all() and (dot<=BR).all():
                                overlapping.append(dot)
                            else:
                                non_overlapping.append(dot)

            # Fix the counting by removing the dots
            overlapping_list=[tuple(el) for el in overlapping]
            removed_dots,single_dots=dots_removing(overlapping_list,pxl)
            overlapping_result_dict['row_dir'][counter]={'tl':TL,'br':BR,'removed_dots':removed_dots,
                                                          'single_dots':single_dots}

            counter+=1


    counter=0 

    # Process the images in columns
    for c in cols:
        col_grp=[ el for el in image_list if el[1] == c ]

        # Calculate the overlapping
        for p in np.arange(len(col_grp)-1):
            overlapping=list()
            non_overlapping=list()

            r,c=col_grp[p]
            pos_t=img_set[r,c]

            idx=np.where(img_set_rav==np.int(pos_t))[0][0]
            corner_top=joining['corner_list'][idx]
            corner_coords_top=corner_top[1]
            r,c=col_grp[p+1]
            pos_b=img_set[r,c]

            idx=np.where(img_set_rav==np.int(pos_b))[0][0]
            corner_bottom=joining['corner_list'][idx]
            corner_coords_bottom=corner_bottom[1]

            positions=[pos_t,pos_b]

            # Calculation expecated coords in cols
            expected_corner_r=corner_coords_top[0]+image_size-overlapping_pixels
            r_error=np.abs(corner_coords_bottom[0]-expected_corner_r)
            c_error=np.abs(corner_coords_top[1]-corner_coords_bottom[1])

            # Remember I know the order of image acquisition
            # Determine the overlapping bounding-box
            r_TL=corner_coords_bottom[0]
            c_TL=np.maximum(corner_coords_bottom[1],corner_coords_top[1])
            TL=(r_TL,c_TL)


            r_BR=corner_coords_top[0]+image_size
            c_BR=np.minimum(corner_coords_bottom[1]+image_size,corner_coords_top[1]+image_size)
            BR=(r_BR,c_BR)


            # identify the dots in the overlapping region
            for pos in positions:
                if pos in aligned_peaks_dict_gene.keys():
                    dots=aligned_peaks_dict_gene[pos]

                    for dot in dots:
                            if (dot>=TL).all() and (dot<=BR).all():
                                overlapping.append(dot)
                            else:
                                non_overlapping.append(dot)

                # Fix the counting by removing the dots
                overlapping_list=[tuple(el) for el in overlapping]
                removed_dots,single_dots=dots_removing(overlapping_list,pxl)
                overlapping_result_dict['col_dir'][counter]={'tl':TL,'br':BR,'removed_dots':removed_dots,
                                                              'single_dots':single_dots}

                counter+=1

    
    return overlapping_result_dict


[docs]def all_dots_removal(combination,all_removed):

    """
    Function to remove the overlapping dots.
    
    Parameters:
    -----------

    combination: dict  
        Dictionary containing hybridization, gene, registation data of the 
        current hybridization and the registerd coords of the selected peaks 
        for the current gene.
    all_removed: dict 
        Dictionary with the coords of the dots that need to be removed or will 
        be kept for all the images

    Returns:
    -----------

    all_coords_cleaned: set 
        Set containing the coords of the dots after removal of the overlapping 
        ones. No image pos info is kept.
    removed_coords: set 
        Set containing the coords of the dots removed. No image pos info is kept.
    all_coords: set 
        Set containing the coords of all the aligned dots before removal
        of the overlapping ones. No image pos info is kept.
    
    
    """
    aligned_peaks_dict_gene = combination['aligned_peaks_dict_gene']
    gene = combination['gene']
    hybridization = combination['hybridization']
    
    # Create the set of the coords that will be removed
    directions_list=['row_dir','col_dir']
    removed_coords=set()
    for direction in directions_list:
        for idx in all_removed[hybridization][gene].keys():
                for pos in all_removed[hybridization][gene][idx][direction].keys():
                    if all_removed[hybridization][gene][idx][direction][pos]['removed_dots']:
                        removed_coords=removed_coords.union(all_removed[hybridization][gene][idx][direction][pos]['removed_dots'])

    # Create a set with the coords of all the dots
    all_coords=list()
    for pos in aligned_peaks_dict_gene.keys():
        all_coords.append([tuple(coords) for coords in aligned_peaks_dict_gene[pos]])

    all_coords=[el for coords in all_coords for el in coords]
    all_coords=set(all_coords)


    # Remove the coords from the dataset
    all_coords_cleaned=all_coords.difference(removed_coords)
    
    return all_coords_cleaned,removed_coords,all_coords

[docs]def function_to_run_dots_removal_parallel(combination,overlapping_percentage,image_size,pxl):
    
    """
    Function to run the overlapping dots removal in parallel
    
    Parameters:
    -----------

    combination: dict  
        Dictionary containing hybridization, gene, registation data of the 
        current hybridization and the registerd coords of the selected peaks 
        for the current gene.
    overlapping_percentage: float 
        Percentage of overlap extracted from the Experimental_metadata.yaml
    image_size: int 
        Size of the image (consider a square image)
    plx: int 
        Radius of pixel used to create the nhood.
    
    
    Returns:
    ---------

    all_coords_cleaned_dict: dict 
        Dictionary with the dots coords of the processed gene before and
        after removal plus the coords of the removed dots. 
    
    """
    # Determine hyb and gene processed
    gene = combination['gene']
    hybridization = combination['hybridization']
    
    micData = combination['reg_data_combination']['micData']
    
    # create the dictionaries
    all_removed=dict()
    all_coords_cleaned_dict=dict()
    all_removed[hybridization]={}
    all_coords_cleaned_dict[hybridization]={}

    all_removed[hybridization][gene]={}
    
    # Calculate all the neighborhood where to remove the dots
    all_nhood = overlapping_images_grid(micData)
    
    
    for idn,image_list in enumerate(all_nhood):
        overlapping_result_dict=catch_duplicated_dots(combination, image_list,
                          overlapping_percentage,image_size,pxl)
        all_removed[hybridization][gene][idn]=overlapping_result_dict
    
    all_coords_cleaned,removed_coords,all_coords = all_dots_removal(combination,all_removed)
    all_coords_cleaned_dict[hybridization][gene]={'all_coords_cleaned':all_coords_cleaned,
                                        'removed_coords':removed_coords,
                                        'all_coords':all_coords}
    
    return all_coords_cleaned_dict

def combine_raw_counting_results(flt_rawcnt_config,hybridizations_infos,
                                 experiment_infos,processing_experiment_directory,
                                stitched_reference_files_dir,reference_gene,add_slash):

    # Collect all the counts in a dictionary maintaining the pos references

    # Prepare the dictionary where to store the raw counts
    all_raw_counts = dict()
    all_raw_counts['selected_peaks_coords_aligned'] = dict()
    all_raw_counts['selected_peaks_coords_not_aligned'] = dict()
    all_raw_counts['remaining_data'] = dict()
    all_raw_counts['registration_data'] = dict()
    skip_genes_counting=flt_rawcnt_config['skip_genes_counting']
    skip_tags_counting=flt_rawcnt_config['skip_tags_counting']

    for hybridization in hybridizations_infos.keys():

        all_raw_counts['selected_peaks_coords_aligned'][hybridization] = dict()
        all_raw_counts['selected_peaks_coords_not_aligned'][hybridization] = dict()
        all_raw_counts['remaining_data'][hybridization]= dict()

        hyb_short = re.sub('Hybridization','hyb',hybridization)
        processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short
        hyb_dir = processing_experiment_directory+processing_hyb+add_slash

        # Determine the genes to stitch in the processing hybridization
        genes_processing = hybridizations_infos[hybridization].keys()

        for gene in genes_processing:
            if gene not in skip_genes_counting:
                if [tag for tag in skip_tags_counting if tag not in gene]:

                    # Determine the counting directories
                    counting_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_counting'+add_slash
                    counting_gene_dirs = glob.glob(counting_gene_dirs_path+'*')

                    # Load the _reg_data.pkl files with the information about the registration
                    reg_fname = stitched_reference_files_dir + flt_rawcnt_config['analysis_name'] +'_'+ \
                                                processing_hyb+'_'+reference_gene+'_stitching_data_reg.pkl'
                    reg_data = pickle.load(open(reg_fname,'rb'))

                    all_raw_counts['registration_data'][hybridization]= reg_data

                    all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene] = dict()
                    all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene] = dict()
                    all_raw_counts['remaining_data'][hybridization][gene]= dict()


                    # Determine the counting directory to process
                    counting_files_dir = [pkl_dir for pkl_dir in counting_gene_dirs if gene in pkl_dir][0]
                    counting_files_dir = counting_files_dir +add_slash

                    # Get list of the files with counts
                    counting_files = glob.glob(counting_files_dir+'*.pkl')


                    # Load the raw counting data and combine them into a dictionary
                    for counting_file in counting_files:
                        pos = int(counting_file.split('_')[-1].split('.')[0])
                        data = pickle.load(open(counting_file,'rb'))

                        all_raw_counts['selected_peaks_coords_not_aligned'][hybridization][gene][pos] = data['selected_peaks']
                        all_raw_counts['remaining_data'][hybridization][gene][pos] = dict()
                        for key in data.keys():
                            if key != 'selected_peaks':
                                all_raw_counts['remaining_data'][hybridization][gene][pos][key] = data[key]

                    # Recalculate the coords but keep position reference
                    all_raw_counts = register_dots_coords(reg_data,hybridization,gene,all_raw_counts)


    # Save all the data
    counting_data_name = processing_experiment_directory +experiment_infos['ExperimentName']+'_all_raw_counting_data.pkl'
    pickle.dump(all_raw_counts,open(counting_data_name,'wb'))
    
    return all_raw_counts