"""
A module to read MRC files in python and numpy.
Written according to MRC specification at http://bio3d.colorado.edu/imod/betaDoc/mrc_format.txt
Also works with FEI MRC files which include a special header block with experimental information.
Note
----
General users:
Use the simplified mrc.mrcReader() function to load the data and meta
data as a python dictionary.
Advanced users and developers:
Access the file internals through the mrc.fileMRC() class.
"""
from pathlib import Path
import numpy as np
[docs]class fileMRC:
""" Read in the data in MRC format and other useful information like metadata. Follows the specification
published at http://bio3d.colorado.edu/imod/betaDoc/mrc_format.txt
Attributes
----------
file_name : str
The name of the file
file_path : pathlib.Path
A pathlib.Path object for the open file
fid : file
The file handle to the opened MRC file.
mrcType : int
The internal MRC data type.
dataType : np.dtype
The numpy dtype corresponding to the mrcType.
dataSize : np.ndarray
The number of pixels along each dimension. Corresponds to the shape attribute of a np.ndarray
gridSize : np.ndarray
The size of the grid. Usually the same as dataSize
volumeSize : np.ndarray
The size of the volume along each direction in Angstroms.
voxelSize : np.ndarray
The size of the voxel along each direction in Angstroms.
cellAngles : np.ndarray
The angles of the cell. Ignored in most cases including ncempy.
axisOrientations : np.ndarray
Mapping the orientations of the data to real space directions X, Y, Z. Ignored by ncempy
minMaxMean : np.ndarray
The minimum, maximum and mean value of the data to avoid computing this every time.
extra : np.ndarray
Extra mbinary metadata if it exists.
FEIinfo : dict
A dictionary of metadata used by FEI (Thermo Fischer) microsocpes for important metadata. This metadata
overwrites the voxelsize attribute if it exists.
dataOffset : int
The integer offset in bytes to the start of the raw data.
dataOut: dict
Will hold the data and metadata to output to the user after getDataset() call.
v : bool
More output for debugging. False by default
Examples
--------
Read in all data and metadata into memory.
>> import ncempy.io as nio
>> mrc0 = nio.mrc.mrcReader('file.mrc')
Low level operations to get 1 slice of the 3D data
>> import ncempy.io as nio
>> with nio.mrc.fileMRC('file.mrc') as f1:
>> single_slice = f1.getSlice(0)
"""
def __init__(self, filename, verbose=False):
"""
Parameters
-----------
filename : str or pathlib.Path or file object
String or pathlib.Path of file object pointing to the filesystem location of the file.
verbose : bool
If True, debug information is printed.
"""
# check filename type
if hasattr(filename, 'read'):
self.fid = filename
try:
self.file_path = Path(self.fid.name)
self.file_name = self.file_path.name
except AttributeError:
self.file_name = None
self.file_path = None
else:
if isinstance(filename, str):
filename = Path(filename)
elif isinstance(filename, Path):
pass
else:
raise TypeError('Filename is supposed to be a string or pathlib.Path or readable file object')
self.file_path = filename
self.file_name = filename.name
# Open the file and quit if the file does not exist
try:
self.fid = open(self.file_path, 'rb')
except IOError as e:
print("I/O error({0}): {1}".format(e.errno, e.strerror))
# necessary declarations, if something fails
self.mrcType = None
self.dataType = None
self.dataSize = None
self.gridSize = None
self.volumeSize = None
self.voxelSize = None
self.cellAngles = None
self.axisOrientations = None
self.minMaxMean = None
self.extra = None
self.FEIinfo = None
self.dataOffset = None
self.dataOut = {} # will hold the data and metadata to output to the user after getDataset() call
# Add a top level variable to indicate verbose output for debugging
self.v = verbose
self.parseHeader()
def __del__(self):
"""Close the file.
"""
if not self.fid.closed:
if self.v:
print('Closing input file: {}'.format(str(self.file_path)))
self.fid.close()
return None
def __enter__(self):
"""Implement python's with statement
"""
return self
def __exit__(self, exception_type, exception_value, traceback):
"""Implement python's with statement.
Close the file using __del__
"""
self.__del__()
return None
[docs] def getDataset(self):
"""Read in the full data block and reshape to an ndarray
with C-style ordering.
"""
self.fid.seek(self.dataOffset, 0) # move to the start of the data from the start of the file
try:
num0 = int(np.prod(self.dataSize, dtype=np.uint64))
data1 = np.fromfile(self.fid, dtype=self.dataType, count=num0)
self.dataOut['data'] = data1.reshape(self.dataSize)
except MemoryError:
print("Not enough memory to read in the full data set. Use getMemmap")
return self.dataOut
[docs] def getSlice(self, num):
"""Read in a slice of an MRC file. Useful for parsing through a large file without reading
the entire data set into memory.
Parameters
----------
num : int
Get the requested image.
Returns
-------
out : ndarray
A 2D slice or a 3D set of slices along the first index
Raises
------
IndexError
If num > the number of slices.
"""
# Check num is within the data array size bounds
if num > (self.dataSize[0] - 1):
raise IndexError('Index {} is out of bounds for array with size {}'.format(num, self.dataSize[0]))
self.fid.seek(self.dataOffset, 0) # move to the start of the data by skipping the header
imSize = self.dataSize[1] * self.dataSize[2] # size of each image in pixels
byteSize = int(np.dtype(self.dataType).itemsize * imSize) # make sure this is an int32
self.fid.seek(num * byteSize, 1) # skip to the slice requested from the start of the data
data1 = np.fromfile(self.fid, dtype=self.dataType, count=imSize) # read in the requested image
data1 = data1.reshape((self.dataSize[1], self.dataSize[2])) # reshape the image
return data1
[docs] def getMemmap(self):
"""Return a numpy memmap object (read-only) for the dataset. This is very useful
for very large datasets to avoid loading the entire data set into memory. No meta data is
returned.
Returns
--------
: numpy.core.memmap
A read-only numpy memmap object with access to the data on disk.
"""
mm = np.memmap(self.fid, dtype=self.dataType, mode='r', offset=self.dataOffset,
shape=tuple(self.dataSize))
return mm
def _applyAxisOrientations(self, arrayIn):
""" This is untested and unused.
"""
return [arrayIn[x - 1] for x in self.axisOrientations]
def _getMRCType(self, dataType):
"""Return the correct data type according to the official MRC type list:
0 image : signed 8-bit bytes range -128 to 127
1 image : 16-bit
2 image : 32-bit reals
3 transform : complex 16-bit integers
4 transform : complex 32-bit reals
6 image : unsigned 16-bit range 0 to 65535
Parameters
----------
dataType: int
The data type value encoded in an MRC header
Returns
--------
out: numpy dtype
The corresponding numpy data type.
"""
if dataType == 0:
Type = np.int8
elif dataType == 1:
Type = np.int16
elif dataType == 2:
Type = np.float32
elif dataType == 6:
Type = np.uint16
else:
print("Unsupported data type" + str(dataType)) # complex data types are currently unsupported
Type = None
return Type
[docs]def mrcReader(file_name):
"""A simple function to read open a MRC, parse the header, and read the full
data set.
Parameters
----------
file_name : str or pathlib.Path
The path to the file to load.
Returns
-------
out: dict
A dictionary containing the data and interesting metadata. The data is attached to the 'data' key.
Example
-------
Read in all data from disk into memory. This assumes the dataset is 3 dimensional:
>> from ncempy.io.mrc import mrcReader
>> import matplotlib.pyplot as plt
>> mrc1 = mrcReader('filename.mrc')
>> plt.imshow(mrc1['data'][0, :, :]) # show the first image in the data set
"""
if isinstance(file_name, str):
file_name = Path(file_name)
with fileMRC(file_name) as f1: # open the file and init the class
im1 = f1.getDataset() # read in the dataset
# Add extra meta data not already in im1
im1['filename'] = file_name.name
im1['pixelUnit'] = 'A'
return im1 # return the data and metadata as a dictionary
[docs]def mrc2raw(file_name):
""" Convert an MRC type file to raw binary. The entire data is read into memory and then written to a new
raw file in the same location with the data type and shape (C-ordering) written into the filename. No other
meta data is retained.
Parameters
----------
file_name : str or pathlib.Path
The file name to convert.
"""
if isinstance(file_name, str):
file_name = Path(file_name)
tomo = mrcReader(file_name)
# Add metadata to file name
out_name = file_name.stem + '_{}_{}.raw'.format(tomo['data'].dtype, tomo['data'].shape)
outPath = file_name.with_name(out_name.replace(' ', ''))
print(outPath)
with open(outPath, 'wb') as f0:
f0.write(tomo['data']) # write out as C ordered data
[docs]def mrc2emd(file_name):
"""Write an MRC file as an HDF5 file in EMD format with same file name and .emd ending.
Header information is retained as attributes.
Parameters
----------
file_name: str
The name of the file to convert from MRC to EMD format.
Returns
-------
out: int
1 if successful.
TODO
-----
Update this to use ncempy.emd class
"""
import h5py
# Read in the MRC data and reshape to C-style ordering
tomo = mrcReader(file_name)
# create the HDF5 file
with h5py.File(file_name.rsplit('.mrc', 1)[0] + '.emd', 'w') as f1: # w- will error if the file exists
# Create the axis vectors in nanometers. Standard MRC pixel size is in Angstroms
xFull = np.linspace(0, tomo['voxelSize'][0] * tomo['data'].shape[0] - 1, tomo['data'].shape[0])
yFull = np.linspace(0, tomo['voxelSize'][1] * tomo['data'].shape[1] - 1, tomo['data'].shape[1])
zFull = np.linspace(0, tomo['voxelSize'][2] * tomo['data'].shape[2] - 1, tomo['data'].shape[2])
# Root data group
dataTop = f1.create_group('data')
# Create tilt series group
tiltseriesGroup = dataTop.create_group('data')
tiltseriesGroup.attrs['emd_group_type'] = np.int8(1)
# Save the data to the EMD file and reshape it to a C-style array
try:
_ = tiltseriesGroup.create_dataset('data', data=tomo['data'], compression='gzip', shuffle=True)
except MemoryError:
raise MemoryError("Not enough memory to write out data to EMD file")
dim1 = tiltseriesGroup.create_dataset('dim1', data=xFull)
dim1.attrs['name'] = np.string_('x')
dim1.attrs['units'] = np.string_('')
dim2 = tiltseriesGroup.create_dataset('dim2', data=yFull)
dim2.attrs['name'] = np.string_('y')
dim2.attrs['units'] = np.string_('')
dim3 = tiltseriesGroup.create_dataset('dim3', data=zFull)
dim3.attrs['name'] = np.string_('z')
dim3.attrs['units'] = np.string_('')
# Create the other groups
scopeGroup = f1.create_group('Microscope')
scopeGroup.attrs['voxel sizes'] = tomo['voxelSize']
userGroup = f1.create_group('User')
commentGroup = f1.create_group('Comments')
[docs]def mrcWriter(filename, data, pixelSize, forceWrite=False):
"""Write out a MRC type file according to the specification at http://bio3d.colorado.edu/imod/doc/mrc_format.txt
Parameters
----------
filename : str or pathlib.Path
The name or Path of the file to write out to.
data : ndarray
The array data to write to disk.
pixelSize : tuple
The size of the pixel along each direction (in Angstroms) as a 3 element vector (sizeZ,sizeY,sizeX).
forceWrite : bool
This will write the data as a C-contiguous array. It is not suggested to use this option and it will
be removed in future versions.
"""
with open(filename, 'wb') as fid:
if len(data.shape) > 3:
print("Too many dimensions")
return
if not data.flags['C_CONTIGUOUS']:
print("Error: Array must be C-style ordering: [numImages,Y,X]. "
"Use numpy.tranpspose and np.ascontiguousarray to change data ordering in memory")
print('Exiting')
return
# initialize the header with 256 zeros with size 4 bytes
header = np.zeros(256, dtype=np.int32)
fid.write(header)
fid.seek(0, 0) # return to the beginning of the file
# Initialize the int32 part of the header
header1 = np.zeros(10, dtype=np.int32)
# Write the number of columns, rows and sections (images)
# header1[0:3] = np.int32(dims) #data size in pixels
header1[0] = np.int32(data.shape[2]) # num columns, the last index in C-style ordering
header1[1] = np.int32(data.shape[1]) # num rows
header1[2] = np.int32(data.shape[0]) # num sections (images)
if data.dtype == np.float32:
header1[3] = np.int32(2)
elif data.dtype == np.uint16:
header1[3] = np.int32(6)
elif data.dtype == np.int16:
header1[3] = np.int32(1)
elif data.dtype == np.int8:
header1[3] = np.int32(0)
else:
print("Data type {} is unsupported. Only int8, int16, uint16, and float32 are supported".format(data.dtype))
return
# Starting point of sub image (not used in IMOD)
header1[4:7] = np.zeros(3, dtype=np.int32)
# Grid size in X,Y,Z
# header1[7:10] = np.int32(dims); #data size in pixels
header1[7] = np.int32(data.shape[2]) # mx
header1[8] = np.int32(data.shape[1]) # my
header1[9] = np.int32(data.shape[0]) # mz
# Write out the first part of the header information
fid.write(header1)
# Cell dimensions (in Angstroms)
# pixel spacing = xlen/mx, ylen/my, zlen/mz
fid.write(np.float32(pixelSize[2] * data.shape[2])) # xlen
fid.write(np.float32(pixelSize[1] * data.shape[1])) # ylen
fid.write(np.float32(pixelSize[0] * data.shape[0])) # zlen
# Cell angles (in degrees)
fid.write(np.float32([90.0, 90.0, 90.0]))
# Description of array directions with respect to: Columns, Rows, Images
fid.write(np.int32([1, 2, 3]))
# Minimum and maximum density
fid.write(np.float32(np.min(data)))
fid.write(np.float32(np.max(data)))
fid.write(np.float32(np.mean(data)))
# Needed to indicate that the data is little endian for NEW-STYLE MRC image2000 HEADER - IMOD 2.6.20 and above
fid.seek(212, 0)
fid.write(np.int8([68, 65, 0, 0])) # use [17,17,0,0] for big endian
# Write out the data
fid.seek(1024)
if forceWrite:
fid.write(np.ascontiguousarray(data)) # Change to C ordering array for writing to disk
else:
fid.write(data) # must be C-contiguous
# Close the file
[docs]def appendData(filename, data):
"""Append a binary set of data to the end of a MRC file. This should only be used in conjunction with
writeHeader() above.
Parameters
----------
filename : str
Name of the MRC file with pre-initiated header and some data already written.
data : ndarray
Data to append to the file.
"""
with open(filename, 'ab') as fid:
# Write out the data
fid.seek(0, 2) # seek to the end of the file
fid.write(data) # Change to C ordering array for writing to disk
[docs]def emd2mrc(filename, dsetPath):
"""Convert EMD data set into MRC data set. The final data type is float32 for convenience.
Parameters
----------
filename : str
The name of the EMD file
dsetPath : str
The HDF5 path to the top group holding the data. ex. '/data/raw/'
"""
import h5py
with h5py.File(filename, 'r') as f1:
# Get the pixel sizes and convert to Ang
dimsPath = dsetPath + '/dim'
print('Warning: Assuming EMD dim vectors are in nanometer')
print('dim vector names/units are:')
for ii in range(1, 4):
print('name, units = {}, {}'.format(f1[dimsPath + str(ii)].attrs['name'],
f1[dimsPath + str(ii)].attrs['units']))
pixelSizeX = (f1[dsetPath + '/dim2'][1] - f1[dsetPath + '/dim2'][0]) * 10 # change nanometers to Ang
pixelSizeY = (f1[dsetPath + '/dim3'][1] - f1[dsetPath + '/dim3'][0]) * 10 # change nanometers to Ang
filenameOut = filename.split('.emd')[
0] + '.mrc' # use the first part of the file as the prefix removing the .emd on the end
print('Warning: Converting to float32 before writing to disk')
mrcWriter(filenameOut, np.float32(f1[dsetPath + '/data']),
(1, pixelSizeY, pixelSizeX))
print('Finished writing to: {}'.format(filenameOut))