Sometimes, for whatever reason, it is necessary to write or read NeXus files without using the routines provided by the NAPI: The NeXus Application Programming Interface. Each example in this section is written to support just one of the low-level file formats supported by NeXus (HDF4, HDF5, or XML).
C-language code examples are provided for
writing and reading NeXus-compliant files
using the native HDF5 interfaces. These examples are derived from the simple
NAPI examples for
writing
and
reading
given in the
Introduction chapter.
Compare these code examples with Example NeXus programs using NAPI.
Example 4.7. Writing a simple NeXus file using native HDF5 commands
/**
* This is an example how to write a valid NeXus file
* using the HDF-5 API alone. Ths structure which is
* going to be created is:
*
* scan:NXentry
* data:NXdata
* counts[]
* @signal=1
* two_theta[]
* @units=degrees
*
* WARNING: each of the HDF function below needs to be
* wrapped into something like:
*
* if((hdfid = H5function(...)) < 0){
* handle error gracefully
* }
* I left the error checking out in order to keep the
* code clearer
*
* This also installs a link from /scan/data/two_theta to /scan/hugo
*
* Mark Koennecke, October 2011
*/
#include <hdf5.h>
#include <stdlib.h>
#include <string.h>
#define LENGTH 400
int main(int argc, char *argv[])
{
float two_theta[LENGTH];
int counts[LENGTH], i, rank, signal;
/* HDF-5 handles */
hid_t fid, fapl, gid, atts, atttype, attid;
hid_t datatype, dataspace, dataprop, dataid;
hsize_t dim[1], maxdim[1];
/* create some data: nothing NeXus or HDF-5 specific */
for(i = 0; i < LENGTH; i++){
two_theta[i] = 10. + .1*i;
counts[i] = (int)(1000 * ((float)random()/(float)RAND_MAX));
}
dim[0] = LENGTH;
maxdim[0] = LENGTH;
rank = 1;
/*
* open the file. The file attribute forces normal file
* closing behaviour down HDF-5's throat
*/
fapl = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fclose_degree(fapl,H5F_CLOSE_STRONG);
fid = H5Fcreate("NXfile.h5", H5F_ACC_TRUNC, H5P_DEFAULT,fapl);
H5Pclose(fapl);
/*
* create scan:NXentry
*/
gid = H5Gcreate(fid, (const char *)"scan",0);
/*
* store the NX_class attribute. Notice that you
* have to take care to close those hids after use
*/
atts = H5Screate(H5S_SCALAR);
atttype = H5Tcopy(H5T_C_S1);
H5Tset_size(atttype, strlen("NXentry"));
attid = H5Acreate(gid,"NX_class", atttype, atts, H5P_DEFAULT);
H5Awrite(attid, atttype, (char *)"NXentry");
H5Sclose(atts);
H5Tclose(atttype);
H5Aclose(attid);
/*
* same thing for data:Nxdata in scan:NXentry.
* A subroutine would be nice to have here.......
*/
gid = H5Gcreate(fid, (const char *)"/scan/data",0);
atts = H5Screate(H5S_SCALAR);
atttype = H5Tcopy(H5T_C_S1);
H5Tset_size(atttype, strlen("NXdata"));
attid = H5Acreate(gid,"NX_class", atttype, atts, H5P_DEFAULT);
H5Awrite(attid, atttype, (char *)"NXdata");
H5Sclose(atts);
H5Tclose(atttype);
H5Aclose(attid);
/*
* store the counts dataset
*/
dataspace = H5Screate_simple(rank,dim,maxdim);
datatype = H5Tcopy(H5T_NATIVE_INT);
dataprop = H5Pcreate(H5P_DATASET_CREATE);
dataid = H5Dcreate(gid,(char *)"counts",datatype,dataspace,dataprop);
H5Dwrite(dataid, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, counts);
H5Sclose(dataspace);
H5Tclose(datatype);
H5Pclose(dataprop);
/*
* set the signal=1 attribute
*/
atts = H5Screate(H5S_SCALAR);
atttype = H5Tcopy(H5T_NATIVE_INT);
H5Tset_size(atttype,1);
attid = H5Acreate(dataid,"signal", atttype, atts, H5P_DEFAULT);
signal = 1;
H5Awrite(attid, atttype, &signal);
H5Sclose(atts);
H5Tclose(atttype);
H5Aclose(attid);
H5Dclose(dataid);
/*
* store the two_theta dataset
*/
dataspace = H5Screate_simple(rank,dim,maxdim);
datatype = H5Tcopy(H5T_NATIVE_FLOAT);
dataprop = H5Pcreate(H5P_DATASET_CREATE);
dataid = H5Dcreate(gid,(char *)"two_theta",datatype,dataspace,dataprop);
H5Dwrite(dataid, datatype, H5S_ALL, H5S_ALL, H5P_DEFAULT, two_theta);
H5Sclose(dataspace);
H5Tclose(datatype);
H5Pclose(dataprop);
/*
* set the units attribute
*/
atttype = H5Tcopy(H5T_C_S1);
H5Tset_size(atttype, strlen("degrees"));
atts = H5Screate(H5S_SCALAR);
attid = H5Acreate(dataid,"units", atttype, atts, H5P_DEFAULT);
H5Awrite(attid, atttype, (char *)"degrees");
H5Sclose(atts);
H5Tclose(atttype);
H5Aclose(attid);
/*
* set the target attribute for linking
*/
atttype = H5Tcopy(H5T_C_S1);
H5Tset_size(atttype, strlen("/scan/data/two_theta"));
atts = H5Screate(H5S_SCALAR);
attid = H5Acreate(dataid,"target", atttype, atts, H5P_DEFAULT);
H5Awrite(attid, atttype, (char *)"/scan/data/two_theta");
H5Sclose(atts);
H5Tclose(atttype);
H5Aclose(attid);
H5Dclose(dataid);
/*
* make a link in /scan to /scan/data/two_theta, thereby
* renaming two_theta to hugo
*/
H5Glink(fid,H5G_LINK_HARD,"/scan/data/two_theta","/scan/hugo");
/*
* close the file
*/
H5Fclose(fid);
}
Example 4.8. Reading a simple NeXus file using native HDF5 commands
/**
* Reading example for reading NeXus files with plain
* HDF-5 API calls. This reads out counts and two_theta
* out of the file generated by nxh5write.
*
* WARNING: I left out all error checking in this example.
* In production code you have to take care of those errors
*
* Mark Koennecke, October 2011
*/
#include <hdf5.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
float *two_theta = NULL;
int *counts = NULL, rank, i;
hid_t fid, dataid, fapl;
hsize_t *dim = NULL;
hid_t datatype, dataspace, memdataspace;
/*
* Open file, thereby enforcing proper file close
* semantics
*/
fapl = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fclose_degree(fapl,H5F_CLOSE_STRONG);
fid = H5Fopen("NXfile.h5", H5F_ACC_RDONLY,fapl);
H5Pclose(fapl);
/*
* open and read the counts dataset
*/
dataid = H5Dopen(fid,"/scan/data/counts");
dataspace = H5Dget_space(dataid);
rank = H5Sget_simple_extent_ndims(dataspace);
dim = malloc(rank*sizeof(hsize_t));
H5Sget_simple_extent_dims(dataspace, dim, NULL);
counts = malloc(dim[0]*sizeof(int));
memdataspace = H5Tcopy(H5T_NATIVE_INT32);
H5Dread(dataid,memdataspace,H5S_ALL, H5S_ALL,H5P_DEFAULT, counts);
H5Dclose(dataid);
H5Sclose(dataspace);
H5Tclose(memdataspace);
/*
* open and read the two_theta data set
*/
dataid = H5Dopen(fid,"/scan/data/two_theta");
dataspace = H5Dget_space(dataid);
rank = H5Sget_simple_extent_ndims(dataspace);
dim = malloc(rank*sizeof(hsize_t));
H5Sget_simple_extent_dims(dataspace, dim, NULL);
two_theta = malloc(dim[0]*sizeof(float));
memdataspace = H5Tcopy(H5T_NATIVE_FLOAT);
H5Dread(dataid,memdataspace,H5S_ALL, H5S_ALL,H5P_DEFAULT, two_theta);
H5Dclose(dataid);
H5Sclose(dataspace);
H5Tclose(memdataspace);
H5Fclose(fid);
for(i = 0; i < dim[0]; i++){
printf("%8.2f %10d\n", two_theta[i], counts[i]);
}
}
One way to gain a quick familiarity with NeXus is to start working with some data. For at least the
first few examples in this section, we have a simple two-column set of 1-D data, collected as part of a
series of alignment scans by the APS USAXS instrument during the time it was stationed at
beam line 32ID. We will show how to write this
data using the Python language and the h5py package[42]
(using h5py calls directly rather than using the NeXus NAPI). The
actual data to be written was extracted (elsewhere) from a spec[43] data file and read as a text block from a file by the Python source code.
Our examples will start with the simplest case and add only mild complexity with each new case
since these examples are meant for those who are unfamiliar with NeXus.
The data shown in Example 4.9, “two-column data for our mr_scan” will be written to the NeXus HDF5 file
using the only two required NeXus objects NXentry and NXdata in the first example
and then minor variations on this structure in the next two examples. The
data model is identical to the one in the Introduction to
Volume I) except that the names will be different, as shown below:
Table 4.1. NeXus Data Model
| our h5py example | from Introduction | |
|---|---|---|
/entry:NXentry
/mr_scan:NXdata
/mr : float64[31]
/I00 : int32[31]
|
|
Example 4.9. two-column data for our mr_scan
17.92608 1037 17.92591 1318 17.92575 1704 17.92558 2857 17.92541 4516 17.92525 9998 17.92508 23819 17.92491 31662 17.92475 40458 17.92458 49087 17.92441 56514 17.92425 63499 17.92408 66802 17.92391 66863 17.92375 66599 17.92358 66206 17.92341 65747 17.92325 65250 17.92308 64129 17.92291 63044 17.92275 60796 17.92258 56795 17.92241 51550 17.92225 43710 17.92208 29315 17.92191 19782 17.92175 12992 17.92158 6622 17.92141 4198 17.92125 2248 17.92108 1321
In this example, the 1-D scan data will be written into the simplest
possible NeXus HDF5 data file, containing only the required NeXus components.
NeXus requires at least one NXentry group at the root level of
an HDF5 file. The NXentry group "all the data and associated
information that comprise a single measurement."
NeXus also requires that each NXentry group must contain at least
one NXdata group. NXdata is used to describe the
plottable data in the NXentry group. The simplest place to store
data in a NeXus file is directly in the NXdata group,
as shown in the next figure.
In the above figure, the data file (writer_1_3_h5py.hdf5) contains
a hierarchy of items, starting with an NXentry named entry.
(The full HDF5 path reference, /entry in this case, is shown to the right of each
component in the data structure.) The next h5py code
example will show how to build an HDF5 data file with this structure.
Starting with the numerical data described above,
the only information
written to the file is the absolute minimum information NeXus requires.
In this example, you can see how the HDF5 file is created, how
Data Groups and datasets (Data Fields)
are created, and how Data Attributes are assigned.
Note particularly the NX_class attribute on each HDF5 group that
describes which of the NeXus Base Classes is being used.
When the next Python program (writer_1_3_h5py.py) is run from the
command line (and there are no problems), the writer_1_3_h5py.hdf5
file is generated.
#!/usr/bin/env python
'''
Writes the simplest NeXus HDF5 file using h5py
according to the example from Figure 1.3
in the Introduction chapter
'''
import h5py
import numpy
INPUT_FILE = 'input.dat'
HDF5_FILE = 'writer_1_3_h5py.hdf5'
#---------------------------
tthData, countsData = numpy.loadtxt(INPUT_FILE).T
f = h5py.File(HDF5_FILE, "w") # create the HDF5 NeXus file
# since this is a simple example, no attributes are used at this point
nxentry = f.create_group('Scan')
nxentry.attrs["NX_class"] = 'NXentry'
nxdata = nxentry.create_group('data')
nxdata.attrs["NX_class"] = 'NXdata'
tth = nxdata.create_dataset("two_theta", data=tthData)
tth.attrs['units'] = "degrees"
counts = nxdata.create_dataset("counts", data=countsData)
counts.attrs['units'] = "counts"
counts.attrs['signal'] = "1"
counts.attrs['axes'] = "two_theta"
f.close() # be CERTAIN to close the file
We wish to make things a bit simpler for ourselves when creating the common
structures we use in our data files. To help, we gather together some of the
common concepts such as create a file,
create a NeXus group,
create a dataset and start to build a helper library.
(See mylib support module for more details.)
Here, we call it my_lib. Applying it to the simple example above, our
code only becomes a couple lines shorter! (Let's hope the library starts to help in larger or
more complicated projects.) Here's the revision that replaces direct calls to numpy
and h5py with calls to our library. It generates the file
writer_1_3.hdf5.
#!/usr/bin/env python
'''
Writes the simplest NeXus HDF5 file using
a simple helper library with h5py and numpy calls
according to the example from Figure 1.3
in the Introduction chapter
'''
import my_lib
INPUT_FILE = 'input.dat'
HDF5_FILE = 'writer_1_3.hdf5'
#---------------------------
tthData, countsData = my_lib.get_2column_data(INPUT_FILE)
f = my_lib.makeFile(HDF5_FILE)
# since this is a simple example, no attributes are used at this point
nxentry = my_lib.makeGroup(f, 'Scan', 'NXentry')
nxdata = my_lib.makeGroup(nxentry, 'data', 'NXdata')
my_lib.makeDataset(nxdata, "two_theta", tthData, units='degrees')
my_lib.makeDataset(nxdata, "counts", countsData,
units='counts', signal='1', axes='two_theta')
f.close() # be CERTAIN to close the file
One of the tools provided with the HDF5 support libraries is
the h5dump command, a command-line tool to print out the
contents of an HDF5 data file. With no better tool in place (the
output is verbose), this is a good tool to investigate what has been
written to the HDF5 file. View this output from the command line
using h5dump writer_1_3.hdf5. Compare the data contents with
the numbers shown above. Note that the various HDF5 data types have all been
decided by the h5py support package.
The only difference between this file and one written using the NAPI is that the NAPI file will have some additional, optional attributes set at the root level of the file that tells the original file name, time it was written, and some version information about the software involved.
HDF5 "writer_1_3.hdf5" {
GROUP "/" {
GROUP "Scan" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXentry"
}
}
GROUP "data" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXdata"
}
}
DATASET "counts" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 31 ) / ( 31 ) }
DATA {
(0): 1037, 1318, 1704, 2857, 4516, 9998, 23819, 31662, 40458,
(9): 49087, 56514, 63499, 66802, 66863, 66599, 66206, 65747,
(17): 65250, 64129, 63044, 60796, 56795, 51550, 43710, 29315,
(25): 19782, 12992, 6622, 4198, 2248, 1321
}
ATTRIBUTE "units" {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "counts"
}
}
ATTRIBUTE "signal" {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "1"
}
}
ATTRIBUTE "axes" {
DATATYPE H5T_STRING {
STRSIZE 9;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "two_theta"
}
}
}
DATASET "two_theta" {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 31 ) / ( 31 ) }
DATA {
(0): 17.9261, 17.9259, 17.9258, 17.9256, 17.9254, 17.9252,
(6): 17.9251, 17.9249, 17.9247, 17.9246, 17.9244, 17.9243,
(12): 17.9241, 17.9239, 17.9237, 17.9236, 17.9234, 17.9232,
(18): 17.9231, 17.9229, 17.9228, 17.9226, 17.9224, 17.9222,
(24): 17.9221, 17.9219, 17.9217, 17.9216, 17.9214, 17.9213,
(30): 17.9211
}
ATTRIBUTE "units" {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "degrees"
}
}
}
}
}
}
}
Since the output of h5dump is verbose, a tool
(see h5toText support module)
was created to
print out the structure of HDF5 data files. This tool provides a simplified view
of the NeXus file. It is run with a command like this:
python h5toText.py h5dump writer_1_3.hdf5. Here is the output:
writer_1_3.hdf5:NeXus data file
Scan:NXentry
@NX_class = NXentry
data:NXdata
@NX_class = NXdata
counts:NX_INT32[31] = __array
@units = counts
@signal = 1
@axes = two_theta
__array = [1037, 1318, 1704, '...', 1321]
two_theta:NX_FLOAT64[31] = __array
@units = degrees
__array = [17.926079999999999, 17.925909999999998, 17.925750000000001, '...', 17.92108]
As the data files in these examples become more complex, you will appreciate
the information density provided by the h5toText.py tool.
Building on the previous example, we wish to identify our measured data with
the detector on the instrument where it was generated.
In this hypothetical case, since the detector was positioned at some
angle two_theta, we choose to store both datasets,
two_theta and counts, in a NeXus group.
One appropriate NeXus group is NXdetector.
This group is placed in a NXinstrument group
which is placed in a NXentry group.
Still, NeXus requires a NXdata group.
Rather than duplicate the same data already placed in the detector group,
we choose to link to those datasets from the NXdata group.
(Compare the next figure with simple example showing data linking in the
NeXus Design chapter of the NeXus User Manual.)
The NeXus Design chapter provides a figure
(simple example showing data linking) with a small variation from our
previous example, placing the measured data
within the /entry/instrument/detector group.
Links are made from that data to the /entry/data group.
The Python code to build an HDF5 data file with that structure (using numerical data from the previous example) is shown below.
#!/usr/bin/env python
'''
Writes a simple NeXus HDF5 file using h5py with links
according to the example from Figure 2.1 in the Design chapter
'''
import my_lib
INPUT_FILE = 'input.dat'
HDF5_FILE = 'writer_2_1.hdf5'
#---------------------------
tthData, countsData = my_lib.get_2column_data(INPUT_FILE)
f = my_lib.makeFile(HDF5_FILE) # create the HDF5 NeXus file
nxentry = my_lib.makeGroup(f, 'entry', 'NXentry')
nxinstrument = my_lib.makeGroup(nxentry, 'instrument', 'NXinstrument')
nxdetector = my_lib.makeGroup(nxinstrument, 'detector', 'NXdetector')
tth = my_lib.makeDataset(nxdetector, "two_theta", tthData, units='degrees')
counts = my_lib.makeDataset(nxdetector, "counts", countsData,
units='counts', signal='1', axes='two_theta')
nxdata = my_lib.makeGroup(nxentry, 'data', 'NXdata')
my_lib.makeLink(nxdetector, tth, nxdata.name+'/two_theta')
my_lib.makeLink(nxdetector, counts, nxdata.name+'/counts')
f.close() # be CERTAIN to close the file
It is interesting to compare the output of the h5dump
of the data file writer_2_1.hdf5 with our Python instructions.
HDF5 "writer_2_1.hdf5" {
GROUP "/" {
GROUP "entry" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXentry"
}
}
GROUP "data" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXdata"
}
}
DATASET "counts" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 31 ) / ( 31 ) }
DATA {
(0): 1037, 1318, 1704, 2857, 4516, 9998, 23819, 31662, 40458,
(9): 49087, 56514, 63499, 66802, 66863, 66599, 66206, 65747,
(17): 65250, 64129, 63044, 60796, 56795, 51550, 43710, 29315,
(25): 19782, 12992, 6622, 4198, 2248, 1321
}
ATTRIBUTE "units" {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "counts"
}
}
ATTRIBUTE "signal" {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "1"
}
}
ATTRIBUTE "axes" {
DATATYPE H5T_STRING {
STRSIZE 9;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "two_theta"
}
}
ATTRIBUTE "target" {
DATATYPE H5T_STRING {
STRSIZE 33;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "/entry/instrument/detector/counts"
}
}
}
DATASET "two_theta" {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 31 ) / ( 31 ) }
DATA {
(0): 17.9261, 17.9259, 17.9258, 17.9256, 17.9254, 17.9252,
(6): 17.9251, 17.9249, 17.9247, 17.9246, 17.9244, 17.9243,
(12): 17.9241, 17.9239, 17.9237, 17.9236, 17.9234, 17.9232,
(18): 17.9231, 17.9229, 17.9228, 17.9226, 17.9224, 17.9222,
(24): 17.9221, 17.9219, 17.9217, 17.9216, 17.9214, 17.9213,
(30): 17.9211
}
ATTRIBUTE "units" {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "degrees"
}
}
ATTRIBUTE "target" {
DATATYPE H5T_STRING {
STRSIZE 36;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "/entry/instrument/detector/two_theta"
}
}
}
}
GROUP "instrument" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 12;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXinstrument"
}
}
GROUP "detector" {
ATTRIBUTE "NX_class" {
DATATYPE H5T_STRING {
STRSIZE 10;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
DATA {
(0): "NXdetector"
}
}
DATASET "counts" {
HARDLINK "/entry/data/counts"
}
DATASET "two_theta" {
HARDLINK "/entry/data/two_theta"
}
}
}
}
}
}
Look carefully! It appears from the output of
h5dump that the actual data for two_theta
and counts has moved into
the NXdata group at HDF5 path /entry/data! But we stored
that data in the NXdetector group at /entry/instrument/detector.
This is normal for h5dump output.
A bit of explanation is necessary at this point.
The data is not stored in either HDF5 group directly. Instead, HDF5
creates a DATA storage element in the file and posts a reference
to that DATA storage element as needed.
An HDF5 hard link
requests another reference to that same DATA storage element.
The h5dump tool describes in full that DATA storage element
the first time (alphabetically) it is called. In our case, that is within the
NXdata group. The next time it is called, within the
NXdetector group, h5dump reports that a hard link
has been made and shows the HDF5 path to the description.
NeXus recognizes this behavior of the HDF5 library and adds an additional structure
when building hard links, the target attribute,
to preserve the original location of the data. Not that it actually matters.
The h5toText.py tool knows about the additional NeXus
target attribute and shows the data to appear in its original
location, in the NXdetector group.
writer_2_1.hdf5:NeXus data file
entry:NXentry
@NX_class = NXentry
data:NXdata
@NX_class = NXdata
counts --> /entry/instrument/detector/counts
two_theta --> /entry/instrument/detector/two_theta
instrument:NXinstrument
@NX_class = NXinstrument
detector:NXdetector
@NX_class = NXdetector
counts:NX_INT32[31] = __array
@units = counts
@signal = 1
@axes = two_theta
@target = /entry/instrument/detector/counts
__array = [1037, 1318, 1704, '...', 1321]
two_theta:NX_FLOAT64[31] = __array
@units = degrees
@target = /entry/instrument/detector/two_theta
__array = [17.926079999999999, 17.925909999999998, 17.925750000000001, '...', 17.92108]
In the main code section of BasicWriter.py, a current time stamp
is written in the format of ISO 8601.
For simplicity of this code example, we use a text string for the time, rather than
computing it directly from Python support library calls. It is easier this way to
see the exact type of string formatting for the time. When using the Python
datatime package, one way to write the time stamp is:
timestamp = "T".join( str( datetime.datetime.now() ).split() )
The data (mr is similar to "two_theta" and
I00 is similar to "counts") is collated into two Python lists. We use our my_lib
support to read the file and parse the two-column format.
The new HDF5 file is opened (and created if not already existing) for writing,
setting common NeXus attributes in the same command from our support library.
Proper HDF5+NeXus groups are created for /entry:NXentry/mr_scan:NXdata.
Since we are not using the NAPI, our
support library must create and set the NX_class attribute on each group.
We want to create the desired structure of
/entry:NXentry/mr_scan:NXdata/. First, our support library calls nxentry =
f.create_group("entry") to create the NXentry group called
entry at the root level. Then, it calls nxdata =
nxentry.create_group("mr_scan") to create the NXentry group called
entry as a child of the NXentry group.
Next, we create a dataset called title to hold a title string that can
appear on the default plot.
Next, we create datasets for mr and I00 using our support library.
The data type of each, as represented in numpy, will be recognized by
h5py and automatically converted to the proper HDF5 type in the file.
A Python dictionary of attributes is given, specifying the engineering units and other
values needed by NeXus to provide a default plot of this data. By setting signal="1"
as an attribute on I00, NeXus recognizes I00 as the default
y axis for the plot. The axes="mr" connects the dataset
to be used as the x axis.
Finally, we must remember to call f.close() or we might
corrupt the file when the program quits.
Example 4.10. BasicWriter.py: Write a NeXus HDF5 file using Python with h5py
#!/usr/bin/env python
'''Writes a NeXus HDF5 file using h5py and numpy'''
import h5py # HDF5 support
import numpy
import my_lib # uses h5py
print "Write a NeXus HDF5 file"
fileName = "prj_test.nexus.hdf5"
timestamp = "2010-10-18T17:17:04-0500"
# load data from two column format
data = numpy.loadtxt('input.dat').T
mr_arr = data[0]
i00_arr = numpy.asarray(data[1],'int32')
# create the HDF5 NeXus file
f = my_lib.makeFile(fileName, file_name=fileName,
file_time=timestamp,
instrument="APS USAXS at 32ID-B",
creator="$Id: BasicWriter.py 998 2011-11-02 20:59:59Z Pete Jemian $",
NeXus_version="4.3.0",
HDF5_Version=h5py.version.hdf5_version,
h5py_version=h5py.version.version)
nxentry = my_lib.makeGroup(f, "entry", "NXentry")
my_lib.makeDataset(nxentry, 'title', data='1-D scan of I00 v. mr')
nxdata = my_lib.makeGroup(nxentry, "mr_scan", "NXdata")
my_lib.makeDataset(nxdata, "mr", mr_arr, units='degrees', long_name='USAXS mr (degrees)')
my_lib.makeDataset(nxdata, "I00", i00_arr, units='counts',
signal='1', # Y axis of default plot
axes='mr', # name "mr" as X axis
long_name='USAXS I00 (counts)')
f.close() # be CERTAIN to close the file
print "wrote file:", fileName
The file reader, BasicReader.py,
is very simple since the bulk of the work is done by h5py.
Our code opens the HDF5 we wrote above,
prints the HDF5 attributes from the file,
reads the two datasets,
and then prints them out as columns.
As simple as that.
Of course, real code might add some error-handling and
extracting other useful stuff from the file.
See that we identified each of the two datasets using HDF5 absolute path references
(just using the group and dataset names). Also, while coding this example, we were reminded
that HDF5 is sensitive to upper or lowercase. That is, I00 is not the same is
i00.
Example 4.11. BasicReader.py: Read a NeXus HDF5 file using Python with h5py
#!/usr/bin/env python
'''Reads NeXus HDF5 files using h5py and prints the contents'''
import h5py # HDF5 support
fileName = "prj_test.nexus.hdf5"
f = h5py.File(fileName, "r")
for item in f.attrs.keys():
print item + ":", f.attrs[item]
mr = f['/entry/mr_scan/mr']
i00 = f['/entry/mr_scan/I00']
print "%s\t%s\t%s" % ("#", "mr", "I00")
for i in range(len(mr)):
print "%d\t%g\t%d" % (i, mr[i], i00[i])
f.close()
Output from BasicReader.py is shown in Example 4.12, “Output from BasicReader.py”.
Example 4.12. Output from BasicReader.py
file_name: prj_test.nexus.hdf5 file_time: 2010-10-18T17:17:04-0500 creator: $Id: BasicWriter.py 647 2010-10-19 22:34:01Z Pete Jemian $ HDF5_Version: 1.8.5 NeXus_version: 4.3.0 h5py_version: 1.2.1 instrument: APS USAXS at 32ID-B # mr I00 0 17.9261 1037 1 17.9259 1318 2 17.9258 1704 3 17.9256 2857 4 17.9254 4516 5 17.9252 9998 6 17.9251 23819 7 17.9249 31662 8 17.9247 40458 9 17.9246 49087 10 17.9244 56514 11 17.9243 63499 12 17.9241 66802 13 17.9239 66863 14 17.9237 66599 15 17.9236 66206 16 17.9234 65747 17 17.9232 65250 18 17.9231 64129 19 17.9229 63044 20 17.9228 60796 21 17.9226 56795 22 17.9224 51550 23 17.9222 43710 24 17.9221 29315 25 17.9219 19782 26 17.9217 12992 27 17.9216 6622 28 17.9214 4198 29 17.9213 2248 30 17.9211 1321
Now we have an HDF5 file that contains our data. What makes this different from a NeXus data file? A NeXus file has a specific arrangement of groups and datasets in an HDF5 file.
To test that our HDF5 file conforms to the NeXus standard,
we use the NXvalidate (java)
program. Referring to the next figure,
we compare our HDF5 file with the rules for
generic[44] data files
(all.nxdl.xml). The only items that have
been flagged are the "non-standard field names"
mr and
I00. Neither of these two names is
specifically named in the NeXus NXDL definition for
the NXdata base class. As we'll see shortly,
this is not a problem.
Note that NXvalidate shows
only the first data field for mr and
I00.
Now that we are certain our file conforms to the NeXus
standard, let's plot it using the NeXpy[45]
client tool. To help label the plot, we added the
long_name attributes to each of our datasets.
We also added metadata to the root level of our HDF5 file
similar to that written by the NAPI. It seemed to be a useful addition.
Compare this with
standard plot of our mr_scan data
and note that the horizontal axis of this plot is mirrored from that above.
This is because the data is stored in the file in descending
mr order and NeXpy has plotted
it that way by default.
Two additional Python modules were used to describe these h5py examples.
The source code for each is given here. The first is a library we wrote that helps us
create standard NeXus components using h5py. The second is a tool that helps
us inspect the content and structure of HDF5 files.
The examples in this section make use of
a small helper library that calls h5py to create the
various NeXus data components of
Data Groups,
Data Fields,
Data Attributes, and
Links.
In a smaller sense, this subroutine library (my_lib) fills the role of the NAPI for writing
the data using h5py.
#!/usr/bin/env python
'''
my_lib Library of routines to support NeXus HDF5 files using h5py
'''
import h5py # HDF5 support
import numpy # in this case, provides data structures
def makeFile(filename, **attr):
"""
create and open an empty NeXus HDF5 file using h5py
Any named parameters in the call to this method will be saved as
attributes of the root of the file.
Note that **attr is a dictionary of named parameters.
:param str filename: valid file name
:param attr: optional keywords of attributes
:return: h5py file object
"""
f = h5py.File(filename, "w")
add_attributes(f, attr)
return f
def makeGroup(parent, name, nxclass):
"""
create a NeXus group
:param obj parent: parent group
:param str name: valid NeXus group name
:param str nxclass: valid NeXus class name
:return: h5py group object
"""
group = parent.create_group(name)
group.attrs["NX_class"] = nxclass
return group
def makeDataset(parent, name, data = None, **attr):
'''
create and write data to a dataset in the HDF5 file hierarchy
:param obj parent: parent group
:param str name: valid NeXus dataset name
:param obj data: the data to be saved
:param attr: optional keywords of attributes
'''
if data == None:
obj = parent.create_dataset(name)
else:
obj = parent.create_dataset(name, data=data)
add_attributes(obj, attr)
return obj
def makeLink(parent, sourceObject, targetName):
"""
create a NeXus link in an HDF5 file.
:param obj parent: parent group of source
:param obj sourceObject: HDF5 object
:param str targetName: HDF5 node path string, such as /entry/data/data
"""
if not 'target' in sourceObject.attrs:
# NeXus link, NOT an HDF5 link!
sourceObject.attrs["target"] = str(sourceObject.name)
parent._id.link(sourceObject.name, targetName, h5py.h5g.LINK_HARD)
def add_attributes(parent, attr):
"""
add attributes to an h5py data item
:param obj parent: h5py parent object
:param dict attr: dictionary of attributes
"""
if attr and type(attr) == type({}):
# attr is a dictionary of attributes
for k, v in attr.items():
parent.attrs[k] = v
def get_2column_data(fileName):
'''read two-column data from a file, first column is float, second column is integer'''
buffer = numpy.loadtxt(fileName).T
xArr = buffer[0]
yArr = numpy.asarray(buffer[1],'int32')
return xArr, yArr
The module h5toText reads an HDF5 data file and prints out the
structure of the groups, datasets, attributes, and links in that file.
There is a command-line option to print out more or less of the data
in the dataset arrays.
#!/usr/bin/env python
'''
Print the structure of an HDF5 file to stdout
$Id: h5toText.py 1039 2012-02-08 14:28:34Z Pete Jemian $
'''
########### SVN repository information ###################
# $Date: 2012-02-08 14:28:34 +0000 (Wed, 08 Feb 2012) $
# $Author: Pete Jemian $
# $Revision: 1039 $
# $URL: file:///isis/svn/nexus/definitions/trunk/manual/examples/h5py/h5toText.py $
# $Id: h5toText.py 1039 2012-02-08 14:28:34Z Pete Jemian $
########### SVN repository information ###################
import h5py
import os
import sys
import getopt
class H5toText(object):
'''
Example usage showing default display::
mc = H5toText(filename)
mc.array_items_shown = 5
mc.report()
'''
filename = None
requested_filename = None
isNeXus = False
array_items_shown = 5
def __init__(self, filename, makeReport = False):
''' Constructor '''
self.requested_filename = filename
if os.path.exists(filename):
self.filename = filename
self.isNeXus = self.testIsNeXus()
if makeReport:
self.report()
def report(self):
''' reporter '''
if self.filename == None: return
f = h5py.File(self.filename, 'r')
txt = self.filename
if self.isNeXus:
txt += ":NeXus data file"
self.showGroup(f, txt, indentation = "")
f.close()
def testIsNeXus(self):
''' test if the selected HDF5 file is a NeXus file '''
result = False
try:
f = h5py.File(self.filename, 'r')
for value in f.itervalues():
if str(type(value)) in ("<class 'h5py.highlevel.Group'>"):
if 'NX_class' in value.attrs:
v = value.attrs['NX_class']
if type(v) == type("a string"):
if v == 'NXentry':
result = True
break
f.close()
except:
pass
return result
def showGroup(self, obj, name, indentation = " "):
'''print the contents of the group'''
nxclass = ""
if 'NX_class' in obj.attrs:
class_attr = obj.attrs['NX_class']
nxclass = ":" + str(class_attr)
print indentation + name + nxclass
self.showAttributes(obj, indentation)
group_equivalents = (
"<class 'h5py.highlevel.File'>",
"<class 'h5py.highlevel.Group'>",
"<class 'h5py._hl.group.Group'>",
)
# show datasets (and links) first
for itemname in sorted(obj):
value = obj[itemname]
if str(type(value)) not in group_equivalents:
self.showDataset(value, itemname, indentation = indentation+" ")
# then show things that look like groups
for itemname in sorted(obj):
value = obj[itemname]
if str(type(value)) in group_equivalents:
self.showGroup(value, itemname, indentation = indentation+" ")
def showAttributes(self, obj, indentation = " "):
'''print any attributes'''
for name, value in obj.attrs.iteritems():
print "%s @%s = %s" % (indentation, name, str(value))
def showDataset(self, dset, name, indentation = " "):
'''print the contents and structure of a dataset'''
shape = dset.shape
if self.isNeXus:
if "target" in dset.attrs:
if dset.attrs['target'] != dset.name:
print "%s%s --> %s" % (indentation, name, dset.attrs['target'])
return
txType = self.getType(dset)
txShape = self.getShape(dset)
if shape == (1,):
value = " = %s" % str(dset[0])
print "%s%s:%s%s%s" % (indentation, name, txType, txShape, value)
self.showAttributes(dset, indentation)
else:
print "%s%s:%s%s = __array" % (indentation, name, txType, txShape)
self.showAttributes(dset, indentation) # show these before __array
if self.array_items_shown > 2:
value = self.formatArray(dset, indentation + ' ')
print "%s %s = %s" % (indentation, "__array", value)
else:
print "%s %s: %s" % (indentation, "__array", "not shown")
def getType(self, obj):
''' get the storage (data) type of the dataset '''
t = str(obj.dtype)
if t[0:2] == '|S':
t = 'char[%s]' % t[2:]
if self.isNeXus:
t = 'NX_' + t.upper()
return t
def getShape(self, obj):
''' return the shape of the HDF5 dataset '''
s = obj.shape
l = []
for dim in s:
l.append(str(dim))
if l == ['1']:
result = ""
else:
result = "[%s]" % ",".join(l)
return result
def formatArray(self, obj, indentation = ' '):
''' nicely format an array up to rank=5 '''
shape = obj.shape
r = ""
if len(shape) in (1, 2, 3, 4, 5):
r = self.formatNdArray(obj, indentation + ' ')
if len(shape) > 5:
r = "### no arrays for rank > 5 ###"
return r
def decideNumShown(self, n):
''' determine how many values to show '''
if self.array_items_shown != None:
if n > self.array_items_shown:
n = self.array_items_shown - 2
return n
def formatNdArray(self, obj, indentation = ' '):
''' return a list of lower-dimension arrays, nicely formatted '''
shape = obj.shape
rank = len(shape)
if not rank in (1, 2, 3, 4, 5): return None
n = self.decideNumShown( shape[0] )
r = []
for i in range(n):
if rank == 1: item = obj[i]
if rank == 2: item = self.formatNdArray(obj[i, :])
if rank == 3: item = self.formatNdArray(obj[i, :, :], indentation + ' ')
if rank == 4: item = self.formatNdArray(obj[i, :, :, :], indentation + ' ')
if rank == 5: item = self.formatNdArray(obj[i, :, :, :, :], indentation + ' ')
r.append( item )
if n < shape[0]:
# skip over most
r.append("...")
# get the last one
if rank == 1: item = obj[-1]
if rank == 2: item = self.formatNdArray(obj[-1, :])
if rank == 3: item = self.formatNdArray(obj[-1, :, :], indentation + ' ')
if rank == 4: item = self.formatNdArray(obj[-1, :, :, :], indentation + ' ')
if rank == 5: item = self.formatNdArray(obj[-1, :, :, :, :], indentation + ' ')
r.append( item )
if rank == 1:
s = str( r )
else:
s = "[\n" + indentation + ' '
s += ("\n" + indentation + ' ').join(r)
s += "\n" + indentation + "]"
return s
if __name__ == '__main__':
limit = 5
filelist = []
filelist.append('../Create/example1.hdf5')
filelist.append('../Create/example2.hdf5')
filelist.append('../Create/example3.hdf5')
filelist.append('../Create/example4.hdf5')
filelist.append('../../../NeXus/definitions/trunk/manual/examples/h5py/prj_test.nexus.hdf5')
filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/dmc01.h5')
filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/dmc02.h5')
filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/focus2007n001335.hdf')
filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/NXtest.h5')
filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/sans2009n012333.hdf')
filelist.append('../Create/simple5.nxs')
filelist.append('../Create/bad.h5')
#filelist = []
#filelist.append('testG.h5')
#filelist.append('testG-pj.h5')
if len(sys.argv) > 1:
try:
opts, args = getopt.getopt(sys.argv[1:], "n:")
except:
print
print "SVN: $Id: h5toText.py 1039 2012-02-08 14:28:34Z Pete Jemian $"
print "usage: ", sys.argv[0], " [-n ##] HDF5_file_name [another_HDF5_file_name]"
print " -n ## : limit number of displayed array items to ## (must be 3 or more or 'None')"
print
for item in opts:
if item[0] == "-n":
if item[1].lower() == "none":
limit = None
else:
limit = int(item[1])
filelist = args
for item in filelist:
mc = H5toText(item)
mc.array_items_shown = limit
mc.report()
The IPNS LRMECS instrument stored data in NeXus HDF4 data files.
One such example is available from the repository of NeXus data file examples:
http://svn.nexusformat.org/definitions/exampledata/IPNS/LRMECS/lrcs3701.nxs
For this example, we will start with a conversion of that original data file into HDF5 format:
http://svn.nexusformat.org/definitions/exampledata/IPNS/LRMECS/lrcs3701.nx5
This file contains two histograms with 2-D images (148x750 and 148x32) of 32-bit integers.
First, we use the h5dump tool to investigate the header content of the file
(no data).
Here, the output of the command: h5dump -H lrcs3701.nx5 has been edited to
only show the first NXdata group (/Histogram1/data):
Example 4.13. LRMECS lrcs3701 data: h5dump output
HDF5 "C:\Users\Pete\Documents\eclipse\NeXus\definitions\exampledata\IPNS\LRMECS\lrcs3701.nx5" {
GROUP "/Histogram1/data" {
DATASET "data" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 148, 750 ) / ( 148, 750 ) }
}
DATASET "polar_angle" {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 148 ) / ( 148 ) }
}
DATASET "time_of_flight" {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 751 ) / ( 751 ) }
}
DATASET "title" {
DATATYPE H5T_STRING {
STRSIZE 44;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
}
For many, the simplest way to view the data content of an HDF5 file is to use the
HDFview[46]
program from The HDF Group. After starting HDFview, the data file
may be loaded by dragging it into the main HDF window.
On opening up to the first NXdata group /Histogram1/data
(as above), and then double-clicking the dataset called: data, we get
our first view of the data.
The data may be represented as an image by accessing the Open As menu from HDFview (on Windows, right click the dataset called data and select the Open As item, consult the HDFview documentation for different platform instructions). Be sure to select the Image radio button, and then (accepting everything else as a default) press the Ok button.
In this image, dark represents low intensity while white represents high intensity.
Another way to visualize this data is to use a commercial package for scientific
data visualization and analysis. One such package is IgorPro.[47]
IgorPro provides a browser for HDF5 files that can open our NeXus HDF5 and display the image.
Follow the instructions from WaveMetrics to install the HDF5 Browser
package:http://www.wavemetrics.com/products/igorpro/dataaccess/hdf5.htm
You may not have to do this step if you have already installed the HDF5 Browser.
IgorPro will tell you if it is not installed properly.
To install the HDF5 Browser, first start IgorPro.
Next, select from the menus and submenus:
Data; Load Waves; Packages; Install HDF5 Package as shown in the next figure.
IgorPro may direct you to perform more activities before you progress from this step.
Next, open the HDF5 Browser by selecting
from the menus and submenus:
Data; Load Waves; New HDF5 Browser as shown in the next figure.
Next, click the Open HDF5 File button and
open the NeXus HDF5 file lrcs3701.nxs. In the lower left Groups
panel, click the data dataset. Also, under the panel
on the right called Load Dataset Options,
choose No Table as shown. Finally, click the
Load Dataset button (in the Datasets group)
to display the image.
In this image, dark represents low intensity while white represents high intensity. The image has been rotated for easier representation in this manual.
[42] h5py: http://code.google.com/p/h5py
[43] SPEC: http://certif.com/spec.html
[44] generic NeXus data files: NeXus data files for which no application-specific NXDL applies
[45] NeXpy:
http://trac.mcs.anl.gov/projects/nexpy
[47] IgorPro:http://www.wavemetrics.com/