Source code for pysmFISH.stitching_package.hdf5preparation

import h5py
import math
import numpy as np
import logging

from . import tilejoining
from . import inout

""" Functions to create necessary structures and fill them in the hfd5
file.

Functions:
create_structures_hdf5_files
fill_hdf5_files
divide_final_image
fill_hdf5_MPI

"""
logger = logging.getLogger(__name__)


########################### HDF5 file creation ##########################
[docs]def create_structures_hdf5_files(stitching_file, joining, nr_pixels,
                                 z_size, hyb_nr, gene,
                                 ubyte = True, blend = 'non linear',
                                 reg_name = ''):
    """Takes an HDF5 file handle and creates the necessary structures.

    Creates groups and data sets, when the groups or data sets already
    exists, they are kept as they are, as long as the data sets have
    the right size and data type. Incompatible data sets will be
    overwritten.
    Stitching file has the following structure:

    __________________Groups____________|___Data sets____

    gene1:
          StitchedImage:
                                        final_image
                                        final_image_ubyte
                                        blending_mask
                                        temp_mask
          StitchingTempTiles:
                         blended_tiles:
                                        0
                                        1
                                        .
                                        .
                                        n
                         ubytes:
                                        0
                                        1
                                        .
                                        .
                                        n
                         temp_masks:
                                        0
                                        1
                                        .
                                        .
                                        n
   gene2: ...



    Parameters:
    -----------

    stitching_file: pointer
        HDF5 file handle. The file where the stitched images will be saved.
    joining: dict
        Dictionary containing keys 'corner_list  and 'final_image_shape'. 
        Corner_list is a list of list, each list is a pair of an image number
        (int) and it's coordinates (numpy array containing floats).
        Final_image_shape is a tuple of size 2 or 3 depending on the number of 
        dimensions and contains ints.
    nr_pixels: int
        Height and length of the tile in pixels, tile is assumed to be square.
    z_size: int 
        The number of layers in one tile (size of the z-axis). 
        Should be 1 or None if the tile is 2D.
    hyb_nr: int 
        The number of the hybridization we are stitching. 
        This will be used to place the data in the right group in stitching_file.
    gene: str
        The name of the gene we are stitching.This will be used to place the
        data in the right group in stitching_file.
    ubyte: bool 
        We need to create a data set for the ubyte image when True.
        Otherwise we only need to create a data set for the high resolution image.
    blend: str 
        When 'non linear' or 'linear', blending will be applied, so we will 
        need to create the structures necessary for saving the blended tiles. When
        it has another value or is None no blending at all will be applied, 
        so we can skip this. This variable also determines to return value of
        linear_blending.
    reg_name: str 
        A name that will be used to create a second StitchedImage group for 
        an image that is registered to be aligned with the other hybridizations. 
        warp_name will be attached to the end of the group name, which is
        StitchedImage.
    
    Returns:
    --------

    stitched_group: pointer
        HDF5 reference to the group where the final will be.
    temp_group: pointer
        HDF5 reference to the group where the blended tiles will be saved.
    linear_blending: bool
        When True later blending should be linear and when False, blending
        should be non-linear.
    ubyte: bool
        The ubyte image should be saved when True. Otherwise only the high
        resolution image should be saved.
    blend: str 
        When 'non linear' or 'linear', blending should be applied. When
        it has another value or is None no blending at all will be applied.


    """
    logger.info("Generating stitching file structures.")
    # Create a group for the stitched images in the stitching file
    #stitching_file.require_group('Hybridization' + str(hyb_nr))
    stitching_file.require_group(gene)
    stitched_group = stitching_file[gene].require_group('StitchedImage' + reg_name)
    # Create the final image in this file
    try:
        final_image = stitched_group.require_dataset('final_image',
                                                joining['final_image_shape'],
                                                dtype = np.float64)
    except TypeError as err:
        logger.info("Incompatible 'final_image' data set already existed, deleting old dataset.\n {}"
                    .format(err))
        del stitched_group['final_image']
        inout.free_hdf5_space(stitching_file)
        final_image = stitched_group.require_dataset('final_image',
                                                joining['final_image_shape'],
                                                dtype = np.float64)
    # Create a low resolution image
    if ubyte:
        try:
            final_image_ubyte = stitched_group.require_dataset('final_image_ubyte',
                                                       joining['final_image_shape'],
                                                       dtype=np.uint8)
        except TypeError as err:
            logger.info("Incompatible 'final_image_ubyte' data set already existed, deleting old dataset.\n {}"
                        .format(err))
            del stitched_group['final_image_ubyte']
            inout.free_hdf5_space(stitching_file)
            final_image = stitched_group.require_dataset('final_image_ubyte',
                                                       joining['final_image_shape'],
                                                       dtype=np.uint8)

    # If blending is required initialize the blending mask in the
    # hdf5 file
    if blend is not None:
        # For the blending masks use only the last 2 dimensions of final
        # image shape, because also when working in 3D the masks can be
        # 2D as there is the same shift in x and y direction for the
        # whole stack.
        try:
            blending_mask = stitched_group.require_dataset('blending_mask',
                                                   joining['final_image_shape'][-2:],
                                                   dtype = np.float64)
        except TypeError as err:
            logger.info("Incompatible 'blending_mask' data set already existed, deleting old dataset.\n {}"
                        .format(err))
            del stitched_group['blending_mask']
            inout.free_hdf5_space(stitching_file)
            final_image = stitched_group.require_dataset('blending_mask',
                                                   joining['final_image_shape'][-2:],
                                                   dtype = np.float64)
        try:
            temp_mask = stitched_group.require_dataset('temp_mask',
                                               joining['final_image_shape'][-2:],
                                               dtype = np.float64)
        except TypeError as err:
            logger.info("Incompatible 'temp_mask' data set already existed, deleting old dataset.\n {}"
                        .format(err))
            del stitched_group['temp_mask']
            inout.free_hdf5_space(stitching_file)
            final_image = stitched_group.require_dataset('temp_mask',
                                               joining['final_image_shape'][-2:],
                                               dtype = np.float64)


    # Check type of blending
    if blend == 'non linear':
        linear_blending = False
    elif blend == 'linear':
        linear_blending = True
    else:
        linear_blending = False
        logger.warning("Blend not defined correctly, \
                                using non-linear blending, \
                                blend is: {}".format(blend))

    # Generate the tmp_file structure
    temp_group = stitching_file[gene].require_group('StitchingTempTiles' + reg_name)
    blended_tiles = temp_group.require_group('blended_tiles')
    ubytes = temp_group.require_group('ubytes')
    temporary_masks = temp_group.require_group('temp_masks')

    for idx in range(len(joining['corner_list'])):
            if z_size == 1 or z_size is None:
                try:
                    blended_tiles.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64)
                except TypeError as err:
                    logger.debug("Incompatible data set for blended tile {}, deleting old dataset.\n {}"
                                .format(idx, err))
                    del blended_tiles[str(idx)]
                    inout.free_hdf5_space(stitching_file)
                    blended_tiles.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64)
                try:
                    ubytes.require_dataset(str(idx), (nr_pixels, nr_pixels), np.uint8)
                except TypeError as err:
                    logger.debug("Incompatible data set for ubyte tile {}, deleting old dataset.\n {}"
                                .format(idx, err))
                    del ubytes[str(idx)]
                    inout.free_hdf5_space(stitching_file)
                    ubytes.require_dataset(str(idx), (nr_pixels, nr_pixels), np.uint8)
            else:
                try:
                    blended_tiles.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.float64)
                except TypeError as err:
                    logger.debug("Incompatible data set for blended tile {}, deleting old dataset.\n {}"
                                .format(idx, err))
                    del blended_tiles[str(idx)]
                    inout.free_hdf5_space(stitching_file)
                    blended_tiles.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.float64)
                try:
                    ubytes.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.uint8)
                except TypeError as err:
                    logger.debug("Incompatible data set for ubyte tile {}, deleting old dataset.\n {}"
                                .format(idx, err))
                    del ubytes[str(idx)]
                    inout.free_hdf5_space(stitching_file)
                    ubytes.require_dataset(str(idx), (z_size, nr_pixels, nr_pixels), np.uint8)

            try:
                temporary_masks.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64)
            except TypeError as err:
                logger.debug("Incompatible data set for temporary masks tile {}, deleting old dataset.\n {}"
                                .format(idx, err))
                del temporary_masks[str(idx)]
                inout.free_hdf5_space(stitching_file)
                temporary_masks.require_dataset(str(idx), (nr_pixels, nr_pixels), np.float64)
    if False:
        logger.info("Flushing hdf5 file to clean up after delete operations")
        before_flush = stitching_file.id.get_filesize()
        stitching_file.flush()
        after_flush = stitching_file.id.get_filesize()
        logger.debug("Size in bytes before flush: {} after flush: {} space freed: {}".format(before_flush, after_flush, before_flush - after_flush))
    return stitched_group, temp_group, linear_blending, ubyte, blend


[docs]def fill_hdf5_files(im_file, joining, nr_pixels, ubyte, blend):
    """
    Fill the data sets in the hdf5 file with zeros or random data.

    Parameters:
    -----------

    im_file: pointer
        HDF5 reference. Reference to the StitchedImage
        group or any group or file that contains final_image
        and optionally final_image_ubyte, blending_mask and
        temp_mask.
    joining: dictionary 
        Containing the key final_image_shape.
        Final_image_shape is a tuple of size 2 or 3 depending on
        the number of dimensions and contains ints.
    nr_pixels: int
        Height and length of the tile in pixels, tile is assumed to be square.
    ubyte: bool
        The ubyte image will be filled when True.
        Otherwise only the high resolution image will be saved.
    blend: str 
        When blend is not None, blending_mask and temp_mask will be filled.
    """
    logger.info("Filling stitching file data sets.")
    # Fill the image arrays with zeros, this works for 3D
    im_file['final_image'][:]= np.zeros(joining['final_image_shape'])
    if ubyte:
        im_file['final_image_ubyte'][:] = np.empty(joining['final_image_shape'],
                                           dtype=np.uint8)
    if blend is not None:
        im_file['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:])
        tilejoining.make_mask(joining, nr_pixels, im_file['blending_mask'])
        im_file['temp_mask'][:] = np.zeros(joining['final_image_shape'][-2:])


[docs]def divide_final_image(im_shape, nr_blocks):
    """
    Divide the image in rows if it is too large to be filled at once.

    The parallel HDF5 implementation cannot handle images larger
    than 2GB. Therefore this function can divide the rows of the image
    over nr_blocks to be filled by separate cores.

    Parameters:
    -----------

    im_shape: tuple
        Shape (y, x) of the image to be filled.
    nr_blocks: int
        The number of block to divide the image in,
        typically this will be the number of available cores.

    Returns:
    --------

    y_corners: list
        List of numpy ints (numpy.int64). The y-corners of all the blocks.
    """

    logger.debug("nr_blocks {}".format(nr_blocks))
    logger.debug("im_shape {}".format(im_shape[-2]))
    y_corners = np.linspace(0, im_shape[-2], num = nr_blocks,
                        endpoint = False, dtype = int)
    logger.debug("y_corners {}".format(y_corners))
    logger.debug('Types: y_corners: {} corner element: {}'
                    .format(type(y_corners), type(y_corners[0])))
    return y_corners


[docs]def fill_hdf5_MPI(y_corners, ind, im_shape, im_file, ubyte, blend):
    """Fill the data sets in the hdf5 file with zeros, block by block.

    The parallel HDF5 implementation cannot handle images larger
    than 2GB. Therefore this function only fills the part of the
    image from the indexing value found in y_corners[ind] up to
    y_corners[ind + 1].

    Parameters:
    -----------

    y_corners: list
        List of numpy ints (numpy.int64). The y-corners of all the blocks.
    ind : int 
        The index of the corner in y_corners
    im_shape: tuple
        Shape of the final image.
    im_file: pointer 
        HDF5 file handle or group reference. The file or
        group with the datasets that need to be filled.
    ubyte: bool
        Ubyte image is filled if True
    blend: str 
        If this is None datasets needed for blending
        are left empty, for all other values they are filled.
    """
    logger.info("Filling stitching file data sets in MPI mode.")
    # Calculate the range to be filled with zeros
    y_min = y_corners[ind]
    if (ind + 1) < len(y_corners):
        y_max = y_corners[ind + 1]
    else:
        y_max = im_shape[-2]

    logger.debug("index: {} y_min: {} y_max: {} im_shape[0]: {}"
                .format(ind, y_min, y_max, im_shape[-2]))

    # Fill with zeros:
    if len(im_shape) == 3:
        im_file['final_image'][:, y_min:y_max,:] = 0.0
        if ubyte:
            im_file['final_image_ubyte'][:, y_min:y_max,:] = 0
    else:
        im_file['final_image'][y_min:y_max,:] = 0.0
        if ubyte:
            im_file['final_image_ubyte'][y_min:y_max,:] = 0
    if blend is not None:
        im_file['blending_mask'][y_min:y_max,:] = 0.0
        im_file['temp_mask'][y_min:y_max,:] = 0.0

    #DO NOT FORGET to run tilejoining.make_mask after filling the arrays!!!!


########################### HDF5 file creation ##########################
[docs]def create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
                                 reference_gene, blend = 'non linear'):
    """Takes an HDF5 file handle and creates the necessary structures.

    Modification of create_structures_hdf5_files to work with .npy list of
    files

    Creates groups and data sets, when the groups or data sets already
    exists, they are kept as they are, as long as the data sets have
    the right size and data type. Incompatible data sets will be
    overwritten.
    Stitching file has the following structure:

    __________________Groups____________|___Data sets____

    gene_stitched:
          StitchedImage:
                                        final_image
                                        blending_mask


    Parameters:
    -----------

    stitching_file: pointer
        HDF5 file handle. The file where the stitched images will be saved.
    joining: dict 
        Dictionary containing keys 'corner_list  and 'final_image_shape'. 
        Corner_list is a list of list, each list is a pair of an image number
        (int) and it's coordinates (numpy array containing floats).
        Final_image_shape is a tuple of size 2 or 3
        depending on the number of dimensions and contains ints.
    nr_pixels: int 
        Height and length of the tile in pixels, tile is assumed to be square.
    reference_gene: str 
        The name of the gene we are stitching.This will be used to place the
        data in the right group in stitching_file.
     blend: str
        When 'non linear' or 'linear',blending will be applied,
        so we will need to create the structures
        necessary for saving the blended tiles. When
        it has another value or is None no blending at
        all will be applied, so we can skip this.
        This variable also determines to return value of
        linear_blending.
    
    Returns:
    --------

    stitched_group: pointer
        HDF5 reference to the group where the final will be.
    linear_blending: bool
        When True later blending should be linear and when False, blending
        should be non-linear.
    blend: str 
        When 'non linear' or 'linear', blending should be applied. When
        it has another value or is None no blending at
        all will be applied.


    """
    logger.info("Generating stitching file structures.")
    # Create a group for the stitched images in the stitching file
    
    stitching_file.require_group(reference_gene)
    stitched_group = stitching_file[reference_gene].require_group('StitchedImage')
    # Create the final image in this file
    try:
        final_image = stitched_group.require_dataset('final_image',
                                                joining['final_image_shape'],
                                                dtype = np.float64)
    except TypeError as err:
        logger.info("Incompatible 'final_image' data set already existed, deleting old dataset.\n {}"
                    .format(err))
        del stitched_group['final_image']
        inout.free_hdf5_space(stitching_file)
        final_image = stitched_group.require_dataset('final_image',
                                                joining['final_image_shape'],
                                                dtype = np.float64)
    

    # If blending is required initialize the blending mask in the
    # hdf5 file
    if blend is not None:
        # For the blending masks use only the last 2 dimensions of final
        # image shape, because also when working in 3D the masks can be
        # 2D as there is the same shift in x and y direction for the
        # whole stack.
        try:
            blending_mask = stitched_group.require_dataset('blending_mask',
                                                   joining['final_image_shape'][-2:],
                                                   dtype = np.float64)
        except TypeError as err:
            logger.info("Incompatible 'blending_mask' data set already existed, deleting old dataset.\n {}"
                        .format(err))
            del stitched_group['blending_mask']
            inout.free_hdf5_space(stitching_file)
            final_image = stitched_group.require_dataset('blending_mask',
                                                   joining['final_image_shape'][-2:],
                                                   dtype = np.float64)

    # Check type of blending
    if blend == 'non linear':
        linear_blending = False
    elif blend == 'linear':
        linear_blending = True
    else:
        linear_blending = False
        logger.warning("Blend not defined correctly, \
                                using non-linear blending, \
                                blend is: {}".format(blend))

    
    if False:
        logger.info("Flushing hdf5 file to clean up after delete operations")
        before_flush = stitching_file.id.get_filesize()
        stitching_file.flush()
        after_flush = stitching_file.id.get_filesize()
        logger.debug("Size in bytes before flush: {} after flush: {} space freed: {}".format(before_flush, after_flush, before_flush - after_flush))
    return stitched_group, linear_blending, blend