Source code for pysmFISH.stitching_package.hdf5preparation

import h5py
import math
import numpy as np
import logging

from . import tilejoining
from . import inout

""" Functions to create necessary structures and fill them in the hfd5
file.

Functions:
create_structures_hdf5_files
fill_hdf5_files
divide_final_image
fill_hdf5_MPI

"""
logger = logging.getLogger(__name__)


########################### HDF5 file creation ##########################
[docs]def create_structures_hdf5_files(stitching_file, joining, nr_pixels, z_size, hyb_nr, gene, ubyte = True, blend = 'non linear', reg_name = ''): """Takes an HDF5 file handle and creates the necessary structures. Creates groups and data sets, when the groups or data sets already exists, they are kept as they are, as long as the data sets have the right size and data type. Incompatible data sets will be overwritten. Stitching file has the following structure: __________________Groups____________|___Data sets____ gene1: StitchedImage: final_image final_image_ubyte blending_mask temp_mask StitchingTempTiles: blended_tiles: 0 1 . . n ubytes: 0 1 . . n temp_masks: 0 1 . . n gene2: ... Parameters: ----------- stitching_file: pointer HDF5 file handle. The file where the stitched images will be saved. joining: dict Dictionary containing keys 'corner_list and 'final_image_shape'. Corner_list is a list of list, each list is a pair of an image number (int) and it's coordinates (numpy array containing floats). Final_image_shape is a tuple of size 2 or 3 depending on the number of dimensions and contains ints. nr_pixels: int Height and length of the tile in pixels, tile is assumed to be square. z_size: int The number of layers in one tile (size of the z-axis). Should be 1 or None if the tile is 2D. hyb_nr: int The number of the hybridization we are stitching. This will be used to place the data in the right group in stitching_file. gene: str The name of the gene we are stitching.This will be used to place the data in the right group in stitching_file. ubyte: bool We need to create a data set for the ubyte image when True. Otherwise we only need to create a data set for the high resolution image. blend: str When 'non linear' or 'linear', blending will be applied, so we will need to create the structures necessary for saving the blended tiles. When it has another value or is None no blending at all will be applied, so we can skip this. This variable also determines to return value of linear_blending. reg_name: str A name that will be used to create a second StitchedImage group for an image that is registered to be aligned with the other hybridizations. warp_name will be attached to the end of the group name, which is StitchedImage. Returns: -------- stitched_group: pointer HDF5 reference to the group where the final will be. temp_group: pointer HDF5 reference to the group where the blended tiles will be saved. linear_blending: bool When True later blending should be linear and when False, blending should be non-linear. ubyte: bool The ubyte image should be saved when True. Otherwise only the high resolution image should be saved. blend: str When 'non linear' or 'linear', blending should be applied. When it has another value or is None no blending at all will be applied. """ logger.info("Generating stitching file structures.") # Create a group for the stitched images in the stitching file #stitching_file.require_group('Hybridization' + str(hyb_nr)) stitching_file.require_group(gene) stitched_group = stitching_file[gene].require_group('StitchedImage' + reg_name) # Create the final image in this file try: final_image = stitched_group.require_dataset('final_image', joining['final_image_shape'], dtype = np.float64) except TypeError as err: logger.info("Incompatible 'final_image' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['final_image'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('final_image', joining['final_image_shape'], dtype = np.float64) # Create a low resolution image if ubyte: try: final_image_ubyte = stitched_group.require_dataset('final_image_ubyte', joining['final_image_shape'], dtype=np.uint8) except TypeError as err: logger.info("Incompatible 'final_image_ubyte' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['final_image_ubyte'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('final_image_ubyte', joining['final_image_shape'], dtype=np.uint8) # If blending is required initialize the blending mask in the # hdf5 file if blend is not None: # For the blending masks use only the last 2 dimensions of final # image shape, because also when working in 3D the masks can be # 2D as there is the same shift in x and y direction for the # whole stack. try: blending_mask = stitched_group.require_dataset('blending_mask', joining['final_image_shape'][-2:], dtype = np.float64) except TypeError as err: logger.info("Incompatible 'blending_mask' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['blending_mask'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('blending_mask', joining['final_image_shape'][-2:], dtype = np.float64) try: temp_mask = stitched_group.require_dataset('temp_mask', joining['final_image_shape'][-2:], dtype = np.float64) except TypeError as err: logger.info("Incompatible 'temp_mask' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['temp_mask'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('temp_mask', joining['final_image_shape'][-2:], dtype = np.float64) # Check type of blending if blend == 'non linear': linear_blending = False elif blend == 'linear': linear_blending = True else: linear_blending = False logger.warning("Blend not defined correctly, \ using non-linear blending, \ blend is: {}".format(blend)) # Generate the tmp_file structure temp_group = stitching_file[gene].require_group('StitchingTempTiles' + reg_name) blended_tiles = temp_group.require_group('blended_tiles') ubytes = temp_group.require_group('ubytes') temporary_masks = temp_group.require_group('temp_masks') for idx in range(len(joining['corner_list'])): if z_size == 1 or z_size is None: try: blended_tiles.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64) except TypeError as err: logger.debug("Incompatible data set for blended tile {}, deleting old dataset.\n {}" .format(idx, err)) del blended_tiles[str(idx)] inout.free_hdf5_space(stitching_file) blended_tiles.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64) try: ubytes.require_dataset(str(idx), (nr_pixels, nr_pixels), np.uint8) except TypeError as err: logger.debug("Incompatible data set for ubyte tile {}, deleting old dataset.\n {}" .format(idx, err)) del ubytes[str(idx)] inout.free_hdf5_space(stitching_file) ubytes.require_dataset(str(idx), (nr_pixels, nr_pixels), np.uint8) else: try: blended_tiles.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.float64) except TypeError as err: logger.debug("Incompatible data set for blended tile {}, deleting old dataset.\n {}" .format(idx, err)) del blended_tiles[str(idx)] inout.free_hdf5_space(stitching_file) blended_tiles.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.float64) try: ubytes.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.uint8) except TypeError as err: logger.debug("Incompatible data set for ubyte tile {}, deleting old dataset.\n {}" .format(idx, err)) del ubytes[str(idx)] inout.free_hdf5_space(stitching_file) ubytes.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.uint8) try: temporary_masks.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64) except TypeError as err: logger.debug("Incompatible data set for temporary masks tile {}, deleting old dataset.\n {}" .format(idx, err)) del temporary_masks[str(idx)] inout.free_hdf5_space(stitching_file) temporary_masks.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64) if False: logger.info("Flushing hdf5 file to clean up after delete operations") before_flush = stitching_file.id.get_filesize() stitching_file.flush() after_flush = stitching_file.id.get_filesize() logger.debug("Size in bytes before flush: {} after flush: {} space freed: {}".format(before_flush, after_flush, before_flush - after_flush)) return stitched_group, temp_group, linear_blending, ubyte, blend
[docs]def fill_hdf5_files(im_file, joining, nr_pixels, ubyte, blend): """ Fill the data sets in the hdf5 file with zeros or random data. Parameters: ----------- im_file: pointer HDF5 reference. Reference to the StitchedImage group or any group or file that contains final_image and optionally final_image_ubyte, blending_mask and temp_mask. joining: dictionary Containing the key final_image_shape. Final_image_shape is a tuple of size 2 or 3 depending on the number of dimensions and contains ints. nr_pixels: int Height and length of the tile in pixels, tile is assumed to be square. ubyte: bool The ubyte image will be filled when True. Otherwise only the high resolution image will be saved. blend: str When blend is not None, blending_mask and temp_mask will be filled. """ logger.info("Filling stitching file data sets.") # Fill the image arrays with zeros, this works for 3D im_file['final_image'][:]= np.zeros(joining['final_image_shape']) if ubyte: im_file['final_image_ubyte'][:] = np.empty(joining['final_image_shape'], dtype=np.uint8) if blend is not None: im_file['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:]) tilejoining.make_mask(joining, nr_pixels, im_file['blending_mask']) im_file['temp_mask'][:] = np.zeros(joining['final_image_shape'][-2:])
[docs]def divide_final_image(im_shape, nr_blocks): """ Divide the image in rows if it is too large to be filled at once. The parallel HDF5 implementation cannot handle images larger than 2GB. Therefore this function can divide the rows of the image over nr_blocks to be filled by separate cores. Parameters: ----------- im_shape: tuple Shape (y, x) of the image to be filled. nr_blocks: int The number of block to divide the image in, typically this will be the number of available cores. Returns: -------- y_corners: list List of numpy ints (numpy.int64). The y-corners of all the blocks. """ logger.debug("nr_blocks {}".format(nr_blocks)) logger.debug("im_shape {}".format(im_shape[-2])) y_corners = np.linspace(0, im_shape[-2], num = nr_blocks, endpoint = False, dtype = int) logger.debug("y_corners {}".format(y_corners)) logger.debug('Types: y_corners: {} corner element: {}' .format(type(y_corners), type(y_corners[0]))) return y_corners
[docs]def fill_hdf5_MPI(y_corners, ind, im_shape, im_file, ubyte, blend): """Fill the data sets in the hdf5 file with zeros, block by block. The parallel HDF5 implementation cannot handle images larger than 2GB. Therefore this function only fills the part of the image from the indexing value found in y_corners[ind] up to y_corners[ind + 1]. Parameters: ----------- y_corners: list List of numpy ints (numpy.int64). The y-corners of all the blocks. ind : int The index of the corner in y_corners im_shape: tuple Shape of the final image. im_file: pointer HDF5 file handle or group reference. The file or group with the datasets that need to be filled. ubyte: bool Ubyte image is filled if True blend: str If this is None datasets needed for blending are left empty, for all other values they are filled. """ logger.info("Filling stitching file data sets in MPI mode.") # Calculate the range to be filled with zeros y_min = y_corners[ind] if (ind + 1) < len(y_corners): y_max = y_corners[ind + 1] else: y_max = im_shape[-2] logger.debug("index: {} y_min: {} y_max: {} im_shape[0]: {}" .format(ind, y_min, y_max, im_shape[-2])) # Fill with zeros: if len(im_shape) == 3: im_file['final_image'][:, y_min:y_max,:] = 0.0 if ubyte: im_file['final_image_ubyte'][:, y_min:y_max,:] = 0 else: im_file['final_image'][y_min:y_max,:] = 0.0 if ubyte: im_file['final_image_ubyte'][y_min:y_max,:] = 0 if blend is not None: im_file['blending_mask'][y_min:y_max,:] = 0.0 im_file['temp_mask'][y_min:y_max,:] = 0.0
#DO NOT FORGET to run tilejoining.make_mask after filling the arrays!!!! ########################### HDF5 file creation ##########################
[docs]def create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels, reference_gene, blend = 'non linear'): """Takes an HDF5 file handle and creates the necessary structures. Modification of create_structures_hdf5_files to work with .npy list of files Creates groups and data sets, when the groups or data sets already exists, they are kept as they are, as long as the data sets have the right size and data type. Incompatible data sets will be overwritten. Stitching file has the following structure: __________________Groups____________|___Data sets____ gene_stitched: StitchedImage: final_image blending_mask Parameters: ----------- stitching_file: pointer HDF5 file handle. The file where the stitched images will be saved. joining: dict Dictionary containing keys 'corner_list and 'final_image_shape'. Corner_list is a list of list, each list is a pair of an image number (int) and it's coordinates (numpy array containing floats). Final_image_shape is a tuple of size 2 or 3 depending on the number of dimensions and contains ints. nr_pixels: int Height and length of the tile in pixels, tile is assumed to be square. reference_gene: str The name of the gene we are stitching.This will be used to place the data in the right group in stitching_file. blend: str When 'non linear' or 'linear',blending will be applied, so we will need to create the structures necessary for saving the blended tiles. When it has another value or is None no blending at all will be applied, so we can skip this. This variable also determines to return value of linear_blending. Returns: -------- stitched_group: pointer HDF5 reference to the group where the final will be. linear_blending: bool When True later blending should be linear and when False, blending should be non-linear. blend: str When 'non linear' or 'linear', blending should be applied. When it has another value or is None no blending at all will be applied. """ logger.info("Generating stitching file structures.") # Create a group for the stitched images in the stitching file stitching_file.require_group(reference_gene) stitched_group = stitching_file[reference_gene].require_group('StitchedImage') # Create the final image in this file try: final_image = stitched_group.require_dataset('final_image', joining['final_image_shape'], dtype = np.float64) except TypeError as err: logger.info("Incompatible 'final_image' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['final_image'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('final_image', joining['final_image_shape'], dtype = np.float64) # If blending is required initialize the blending mask in the # hdf5 file if blend is not None: # For the blending masks use only the last 2 dimensions of final # image shape, because also when working in 3D the masks can be # 2D as there is the same shift in x and y direction for the # whole stack. try: blending_mask = stitched_group.require_dataset('blending_mask', joining['final_image_shape'][-2:], dtype = np.float64) except TypeError as err: logger.info("Incompatible 'blending_mask' data set already existed, deleting old dataset.\n {}" .format(err)) del stitched_group['blending_mask'] inout.free_hdf5_space(stitching_file) final_image = stitched_group.require_dataset('blending_mask', joining['final_image_shape'][-2:], dtype = np.float64) # Check type of blending if blend == 'non linear': linear_blending = False elif blend == 'linear': linear_blending = True else: linear_blending = False logger.warning("Blend not defined correctly, \ using non-linear blending, \ blend is: {}".format(blend)) if False: logger.info("Flushing hdf5 file to clean up after delete operations") before_flush = stitching_file.id.get_filesize() stitching_file.flush() after_flush = stitching_file.id.get_filesize() logger.debug("Size in bytes before flush: {} after flush: {} space freed: {}".format(before_flush, after_flush, before_flush - after_flush)) return stitched_group, linear_blending, blend