Read Data

This example demonstrates how to read the HDF5 output of Luna using h5py and pandas.

Luna Command

./tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.h5 --eps-t 150ns --eps-s 1 --layout single

Python Script

Python code to read HDF5 data
  1import os, sys
  2from typing import *
  3import h5py  # ensure you have `pip install h5py`
  4import pandas as pd  # ensure you have `pip install pandas`
  5import numpy as np
  6# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
  7# to a hdf5 file generated by luna. Replace with your own!
  8sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  9from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
 10
 11
 12def load_pixel_hits(hdf5_fname: str) -> pd.DataFrame:
 13    """
 14    Load pixel hits data from an HDF5 file.
 15
 16    Parameters:
 17    -----------
 18    hdf5_fname : str
 19        The path to the HDF5 file.
 20
 21    Returns:
 22    --------
 23    pd.DataFrame
 24        A DataFrame containing the pixel hits data.
 25    """
 26    with h5py.File(hdf5_fname, 'r') as hdf5_file:
 27        print(f"hdf5 datasets: {list(hdf5_file.keys())}")
 28        pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
 29    return pixel_hits
 30
 31
 32def load_clusters(hdf5_fname: str) -> pd.DataFrame:
 33    """
 34    Load clusters data from an HDF5 file, if available.
 35
 36    Parameters:
 37    -----------
 38    hdf5_fname : str
 39        The path to the HDF5 file.
 40
 41    Returns:
 42    --------
 43    pd.DataFrame
 44        A DataFrame containing the clusters data.
 45    """
 46    with (h5py.File(hdf5_fname, 'r') as hdf5_file):
 47        clusters = pd.DataFrame(hdf5_file["Clusters"][:])
 48    return clusters
 49
 50
 51
 52def load_timewalk_matrix(DATA_FNAME) -> pd.DataFrame:
 53    """
 54    Load the timewalk matrix from an HDF5 file.
 55
 56    The timewalk matrix is a long form matrix with columns:
 57    - CToT
 58    - ToT
 59    - AverageDToA
 60    - SumSquareDiff
 61    - Count
 62    - Std (standard deviation)
 63    - Sem (Standard error of mean)
 64
 65    Parameters:
 66    -----------
 67    DATA_FNAME : str
 68        The path to the HDF5 file.
 69
 70    Returns:
 71    --------
 72    pd.DataFrame
 73        A DataFrame containing the timewalk matrix data.
 74    """
 75    with h5py.File(DATA_FNAME, "r") as f:
 76        if "TimewalkMatrix" in f.keys():
 77            ds = f["TimewalkMatrix"]
 78            timewalk_matrix = pd.DataFrame(ds[:])
 79            colnames: List[Tuple[bytes]] = ds.attrs["col_names"]
 80            # decode from bytes
 81            colnames: List[str] = [i[0].decode() for i in colnames]
 82            timewalk_matrix.columns = colnames
 83        else:
 84            return
 85
 86    integer_types = ["CToT", "ToT", "Count"]
 87    timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
 88    return timewalk_matrix
 89
 90
 91def load_timewalk_lookup_table(DATA_FNAME) -> pd.DataFrame:
 92    """
 93    Load the timewalk lookup table from an HDF5 file.
 94
 95    Parameters:
 96    -----------
 97    DATA_FNAME : str
 98        The path to the HDF5 file.
 99
100    Returns:
101    --------
102    pd.DataFrame
103        A DataFrame containing the timewalk lookup table data.
104    """
105    with h5py.File(DATA_FNAME, "r") as f:
106        if "TimewalkLookupTable" in f.keys():
107            ds = f["TimewalkLookupTable"]
108            lookup_df = pd.DataFrame(ds)
109            lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
110        else:
111            return None
112    # column names:
113    # ToT Average SumSquareDiff Std Count
114    lookup_df = lookup_df.dropna(axis=0, how="all")
115    integer_types = ["ToT", "Count"]
116    lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
117    lookup_df.set_index("ToT", inplace=True)
118
119    return lookup_df
120
121
122
123if __name__ == "__main__":
124    # 12 decimals
125    pd.set_option('display.float_format', '{:.12f}'.format)
126
127    pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5)
128    clusters = load_clusters(EXAMPLE_DATA_HDF5)
129    timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5)
130    timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5)
131
132    print("Pixel Hits: ")
133    print(pixel_hits.head(15).to_string())
134
135    print("Clusters: ")
136    print(clusters.head(15).to_string())
137
138    if timewalk_matrix is not None:
139        print("Time walk matrix: ")
140        print(timewalk_matrix.iloc[0:10, 0:10].to_string())
141
142    if timewalk_lut is not None:
143        print("Time walk look up table: ")
144        print(timewalk_lut.head(15).to_string())

Script Output

Example Output
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']
Pixel Hits: 
                toa  cid   tot    dtoa    x    y  tof
0   110345462656250    0   200       0  140  193   -1
1   110345462890625    1   150       0  143  193   -1
2   110345896421875    2  1475       0   68   92   -1
3   110345896453125    2   850   31250   67   92   -1
4   110345896640625    2   300  218750   68   91   -1
5   110345896890625    2   150  468750   67   91   -1
6   110346426421875    3   800       0   80   80   -1
7   110346426640625    3   250  218750   80   81   -1
8   110346426734375    3   250  312500   79   80   -1
9   110346973281250    4   375       0   14   18   -1
10  110346973312500    4   375   31250   14   19   -1
11  110349264015625    5   375       0   49    5   -1
12  110350602046875    6  1675       0  146  106   -1
13  110350602125000    6   425   78125  146  105   -1
14  110350602234375    6   350  187500  145  106   -1
Clusters: 
    id  size             ctoa  sum_tot  ctot   cx   cy
0    0     1  110345462656250      200   200  140  193
1    1     1  110345462890625      150   150  143  193
2    2     4  110345896421875     2775  1475   68   92
3    3     3  110346426421875     1300   800   80   80
4    4     2  110346973281250      750   375   14   18
5    5     1  110349264015625      375   375   49    5
6    6     4  110350602046875     2600  1675  146  106
7    7     2  110351021093750     1475  1125  142    0
8    8     3  110351021156250      850   425  143    2
9    9     4  110353755625000     1700   700   38  195
10  10     4  110354401906250     2500   925   63   22
11  11     4  110355869171875     2875  1250   91   29
12  12     6  110356705875000     2325   925  245  207
13  13     3  110360353843750     1425   850  174  189
14  14     4  110360842109375     3200   975  141  107