Source code for ncempy.io.emdVelox

""" Provides an interface to Velox EMD datasets. Not to be confused with
Berkeley EMD data sets (see emd.py) instead.

The reader for EMD Berkeley and Velox files will be combined in the near
future once they are fully tested separately.

Currently limited to only images. This file can not load spectra.

Note
----
General users:
    Use the simplified emdVelox.emdVeloxReader() function to load the data and meta
    data as a python dictionary.

Advanced users and developers:
    Access the file internals through the emd.fileEMDVelox() class.
"""

import json
import datetime
from pathlib import Path
import numpy as np
import h5py


[docs]class fileEMDVelox: """ Class to represent Velox EMD files. It uses the h5py caching functionality to increase the default cache size from 1MB to 10MB. This significantly improves file reading for EMDVelox files which are written with Fortran- style ordering and an inefficient choice of chunking. Attributes ---------- list_data : list A list containing each h5py data group that can be loaded. _file_hdl : h5py.File The File handle from h5py.File. metaDataJSON : dict The full metadata for the most recently loaded data set. Note that you have to load a data set for this to be populated or run parseMetaData(num). file_name : str The name of the file file_path : pathlib.Path A pathlib.Path object for the open file Examples -------- Open an EMD Velox file containing 1 image. >> import ncempy.io as nio >> with nio.emdVelox.fileEMDVelox('1435 1.2 Mx STEM HAADF-DF4-DF2-BF.emd') as emd1: >> print(emd1) # print information about the file >> im0, metadata0 = emd1.get_dataset(0) """ def __init__(self, filename): """ Init opening the file and finding all data groups. Currently only searches the /Data/Images group. Parameters ---------- filename : str or pathlib.Path The file path to load as a string or a pathlib.Path object. """ # necessary declaration in case something goes wrong self._file_hdl = None self.file_name = None self.file_path = None self.metaDataJSON = None self.list_data = None self.list_emds = None # this will be identical to list_data if hasattr(filename, 'read'): try: self.file_path = Path(filename.name) self.file_name = self.file_path.name except AttributeError: self.file_path = None self.file_name = None else: # check filename type, change to pathlib.Path if isinstance(filename, str): filename = Path(filename) elif isinstance(filename, Path): pass else: raise TypeError('Filename is supposed to be a string or pathlib.Path or file object') self.file_path = filename self.file_name = self.file_path.name # try opening the file try: self._file_hdl = h5py.File(filename, 'r', rdcc_nbytes=10485760) # rdcc_nbytes = 10*1024**2 except: print('Error opening file: "{}"'.format(filename)) raise self._find_groups() def __del__(self): """ Destructor for EMD file object. Closes the h5py file. """ # close the file self._file_hdl.close() def __enter__(self): """ Implement python's with statement """ return self def __exit__(self, exception_type, exception_value, traceback): """ Implement python's with statement and close the file using __del__() """ self.__del__() return None def __str__(self): """ Print out the detectors used to take the data and the pixel size to help with telling users about the data in the file. """ out = 'EMD file contains {} data sets\n'.format(len(self.list_data)) md = {'pixelSize': 1, 'detectorName': 'unknown'} for ii, group in enumerate(self.list_data): md = self.parseMetaData(group) out += 'Dataset #{} from detector: {}\n'.format(ii, md['detectorName']) out += 'pixel size = ({0[0]:0.4f}, {0[1]:0.4f}) nm'.format(md['pixelSize']) return out def _find_groups(self): """ Find all groups that contain image data. Note ---- This currently only finds images. """ try: # Get all of the groups in the Image group self.list_data = list(self._file_hdl['Data/Image'].values()) except: self.list_data = [] raise self.list_emds = self.list_data # make a copy to match the Berkeley EMD attribute
[docs] def get_dataset(self, group, memmap=False): """ Get the data from a group and the associated metadata. Parameters ---------- group : HDF5 dataset or int The link to the HDF5 dataset in the file or an integer for the number of the dataset. The list of datasets is held in the list_data attribute populated on class init. memmap: bool, default = False If False (default), then a numpy ndarray is returned. If True the HDF5 data set object is returned and data is loaded from disk as needed. Returns ------- : tuple (ndarray or HDF5 dataset, dict) A tuple containing the data as a ndarray or a HDF5 dataset object. The second argument is a python dict of metadata. """ # check input try: if isinstance(group, int): group = self.list_data[group] except IndexError: raise IndexError('EMDVelox group #{} does not exist.'.format(group)) if not isinstance(group, h5py.Group): raise TypeError('group needs to refer to a valid HDF5 group!') if memmap: data = group['Data'] # return the HDF5 dataset object else: data = np.squeeze(group['Data'][:]) # load the full data set metaData = self.parseMetaData(group) return data, metaData
[docs] def parseMetaData(self, group): """ Parse metadata in a data group. Determines the pixelSize and detector name. The EMDVelox data sets have extensive metadata stored as a JSON type string. Parameters ---------- group : h5py.Group or int The h5py group to load the metadata from which is easily retrived from the list_data attribute. If input is an int then the group corresponding to list_data attribute is used. The string metadata is loaded and parsed by the json module into a dictionary. Returns ------- md : dict The JSON information in the file returned as a python dictionary. """ try: if type(group) is int: group = self.list_data[group] except IndexError: raise IndexError('EMDVelox group #{} does not exist.'.format(group)) md = {} tempMetaData = group['Metadata'][:, 0] # Reduce to valid metadata validMetaDataIndex = np.where(tempMetaData > 0) metaData = tempMetaData[validMetaDataIndex].tobytes() # Interpret as UTF-8 encoded characters and load as JSON self.metaDataJSON = json.loads(metaData.decode('utf-8', 'ignore')) # Pull out basic meta data about the images md['pixelUnit'] = [self.metaDataJSON['BinaryResult']['PixelUnitX'], self.metaDataJSON['BinaryResult']['PixelUnitY']] convert_pixel_sizeX = 1 convert_pixel_sizeY = 1 if md['pixelUnit'][0] == 'm': convert_pixel_sizeX = 1e9 md['pixelUnit'][0] = 'nm' if md['pixelUnit'][1] == 'm': convert_pixel_sizeY = 1e9 md['pixelUnit'][1] = 'nm' md['pixelSizeUnit'] = md['pixelUnit'] # Keep this metadata key for legacy purposes pixelSizeX = float(self.metaDataJSON['BinaryResult']['PixelSize']['width'])*convert_pixel_sizeX # convert pixelSizeY = float(self.metaDataJSON['BinaryResult']['PixelSize']['height'])*convert_pixel_sizeY # change to nm # Construct meta data dictionary with most useful metadata md['pixelSize'] = (pixelSizeX, pixelSizeY) md['AcquisitionTime'] = datetime.datetime.fromtimestamp(int( self.metaDataJSON['Acquisition']['AcquisitionStartDatetime']['DateTime'])) md['Stage'] = self.metaDataJSON['Stage'] md['detectorName'] = self.metaDataJSON['BinaryResult']['Detector'] try: md['dwellTime'] = self.metaDataJSON['Scan']['DwellTime'] # only for STEM except KeyError: md['dwellTime'] = 0 return md
[docs]def emdVeloxReader(filename, dsetNum=0): """ A simple helper function to read in the data and metadata in a structured format similar to the other ncempy readers. Note ---- Not fully implemented yet. Work in progress. Important metadata is missing, but you can get the data. Parameters ---------- filename : str or pathlib.Path The path to the file. dsetNum : int, default = 0 The index of the data set to load. Returns ------- : dict Data and metadata as a dictionary similar to other ncempy readers. Example ------- Load all data and metadata from a data set in an EMD file >> import ncempy.io as nio >> emd0 = nio.emdVelox.emdVeloxReader('filename.emd', dsetNum = 0) """ with fileEMDVelox(filename) as emd0: d, md = emd0.get_dataset(dsetNum) out = {'data': d, 'filename': filename} out.update(md) return out