Read data, Unit Conversion

This example demonstrates how to read the HDF5 output of Luna using h5py and pandas and shows you how to convert the ToA time units to whatever you like.

Luna Command

./tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.h5 --eps-t 200ns --eps-s 2 --layout single

Python Script

Python code to read HDF5 data and convert the units
  1import os, sys
  2from enum import Enum
  3from typing import Literal
  4
  5import h5py  # ensure you have `pip install h5py`
  6import numpy as np
  7import pandas as pd  # ensure you have `pip install pandas`
  8
  9# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
 10# to a hdf5 file generated by luna
 11sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 12from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
 13
 14
 15class DetectorType(Enum):
 16    """Enum for specifying the detector type."""
 17    TPX3 = "tpx3"
 18    TPX4 = "tpx4"
 19
 20
 21class TimeUnit(Enum):
 22    """Enum for specifying time units."""
 23    Seconds = 's'
 24    Milliseconds = 'ms'
 25    Microseconds = 'us'
 26    Nanoseconds = 'ns'
 27    Picoseconds = 'ps'
 28    Femtoseconds100 = 'fs100'
 29    Femtoseconds = 'fs'
 30    Attoseconds100 = 'as100'
 31    Attoseconds = 'as'
 32
 33
 34# tpx4 users should change this to DetectorType.TPX4
 35DETECTOR_TYPE: DetectorType = DetectorType.TPX3
 36
 37# toa unit in luna output is 1e-13 in tpx3 and 1e-16 in tpx4
 38INTERNAL_TOA_UNITS = TimeUnit.Femtoseconds100 if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
 39
 40# in tpx3 tot is in increments of 25ns but in tpx4 tot is the same time resolution as toa.
 41INTERNAL_TOT_UNITS = TimeUnit.Nanoseconds if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
 42
 43
 44def convert_time_units(value: float | pd.Series | pd.DataFrame, from_unit: TimeUnit, to_unit: TimeUnit):
 45    """
 46    Convert time units from one unit to another.
 47
 48    Parameters:
 49    -----------
 50    value : float | pd.Series | pd.DataFrame
 51        The value(s) to be converted.
 52
 53    from_unit : TimeUnit
 54        The unit of the input value(s).
 55
 56    to_unit : TimeUnit
 57        The unit to convert the value(s) to.
 58
 59    Returns:
 60    --------
 61    float | pd.Series | pd.DataFrame
 62        The converted value(s).
 63    """
 64    conversion_factors = {
 65        TimeUnit.Seconds: 1,
 66        TimeUnit.Milliseconds: 1e-3,
 67        TimeUnit.Microseconds: 1e-6,
 68        TimeUnit.Nanoseconds: 1e-9,
 69        TimeUnit.Picoseconds: 1e-12,
 70        TimeUnit.Femtoseconds100: 1e-13,
 71        TimeUnit.Femtoseconds: 1e-15,
 72        TimeUnit.Attoseconds100: 1e-16,
 73        TimeUnit.Attoseconds: 1e-18,
 74    }
 75
 76    value_in_seconds = value * conversion_factors[from_unit]
 77    return value_in_seconds / conversion_factors[to_unit]
 78
 79
 80def load_pixel_hits(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
 81    """
 82    Load pixel hits data from an HDF5 file and convert time units.
 83
 84    Parameters:
 85    -----------
 86    hdf5_fname : str
 87        The path to the HDF5 file.
 88
 89    toa_unit : TimeUnit
 90        The time unit to convert the 'toa' columns to.
 91
 92    Returns:
 93    --------
 94    pd.DataFrame
 95        A DataFrame containing the pixel hits data with converted time units.
 96    """
 97    with h5py.File(hdf5_fname, 'r') as hdf5_file:
 98        print(f"hdf5 datasets: {list(hdf5_file.keys())}")
 99        pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
100        # Convert 'toa' columns to specified time units
101        pixel_hits['toa'] = convert_time_units(pixel_hits['toa'], INTERNAL_TOA_UNITS, toa_unit)
102    return pixel_hits
103
104
105def load_clusters(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
106    """
107    Load clusters data from an HDF5 file and convert time units.
108
109    Parameters:
110    -----------
111    hdf5_fname : str
112        The path to the HDF5 file.
113
114    toa_unit : TimeUnit
115        The time unit to convert the 'toa' columns to.
116
117    Returns:
118    --------
119    pd.DataFrame
120        A DataFrame containing the clusters data with converted time units.
121    """
122    with h5py.File(hdf5_fname, 'r') as hdf5_file:
123        clusters = pd.DataFrame(hdf5_file["Clusters"][:])
124        # Convert relevant 'toa' columns to specified time units
125        clusters['ctoa'] = convert_time_units(clusters['ctoa'], INTERNAL_TOA_UNITS, toa_unit)
126    return clusters
127
128
129def load_timewalk_matrix(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
130    """
131    Load the timewalk matrix from an HDF5 file and convert time units.
132
133    The timewalk matrix is a long form matrix with columns:
134    - CToT
135    - ToT
136    - AverageDToA
137    - SumSquareDiff
138    - Count
139    - Std (standard deviation)
140    - Sem (Standard error of mean)
141
142    Parameters:
143    -----------
144    hdf5_fname : str
145        The path to the HDF5 file.
146
147    toa_unit : TimeUnit
148        The time unit to convert the time columns to.
149
150    Returns:
151    --------
152    pd.DataFrame
153        A DataFrame containing the timewalk matrix data with converted time units.
154    """
155    with h5py.File(hdf5_fname, "r") as f:
156        print(f.keys())
157        if "TimewalkMatrix" in f.keys():
158            ds = f["TimewalkMatrix"]
159            timewalk_matrix = pd.DataFrame(ds[:])
160            colnames = ds.attrs["col_names"]
161            # decode from bytes
162            colnames = [i[0].decode() for i in colnames]
163            timewalk_matrix.columns = colnames
164        else:
165            return
166
167    # all column names:
168    # CToT, ToT, AverageDToA, SumSquareDiff, Count, Std, Sem
169
170    integer_types = ["CToT", "ToT", "Count"]
171    timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
172
173    time_columns = ['AverageDToA', 'SumSquareDiff', 'Std', 'Sem']  # time
174    timewalk_matrix[time_columns] = convert_time_units(
175        timewalk_matrix[time_columns],
176        INTERNAL_TOA_UNITS, toa_unit
177    )
178
179    return timewalk_matrix
180
181
182def load_timewalk_lookup_table(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
183    """
184    Load the timewalk lookup table from an HDF5 file and convert time units.
185
186    The timewalk lookup table contains columns:
187    - ToT
188    - Average
189    - SumSquareDiff
190    - Std (standard deviation)
191    - Count
192
193    Parameters:
194    -----------
195    hdf5_fname : str
196        The path to the HDF5 file.
197
198    toa_unit : TimeUnit
199        The time unit to convert the time columns to.
200
201    Returns:
202    --------
203    pd.DataFrame
204        A DataFrame containing the timewalk lookup table data with converted time units.
205    """
206    with h5py.File(hdf5_fname, "r") as f:
207        if "TimewalkLookupTable" in f.keys():
208            ds = f["TimewalkLookupTable"]
209            lookup_df = pd.DataFrame(ds)
210            lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
211        else:
212            return
213    # column names:
214    # ToT Average SumSquareDiff Std Count
215    lookup_df = lookup_df.dropna(axis=0, how="all")
216    integer_types = ["ToT", "Count"]
217    lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
218    lookup_df.set_index("ToT", inplace=True)
219
220    time_columns = ['AverageDToA', 'SumSquareDiff', 'Std']  # time
221    lookup_df[time_columns] = convert_time_units(
222        lookup_df[time_columns],
223        INTERNAL_TOA_UNITS, toa_unit
224    )
225
226    return lookup_df
227
228
229if __name__ == "__main__":
230    pd.set_option('display.float_format', '{:.12f}'.format)
231    toa_units = TimeUnit.Seconds
232
233    pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5, toa_units)
234    clusters = load_clusters(EXAMPLE_DATA_HDF5, toa_units)
235    timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
236    timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
237
238    print("Pixel Hits: ")
239    print(pixel_hits.head(15).to_string())
240
241    print("Clusters: ")
242    print(clusters.head(15).to_string())
243
244    if timewalk_matrix is not None:
245        print("Time walk matrix: ")
246        print(timewalk_matrix.iloc[0:10, 0:10].to_string())
247
248    if timewalk_lut is not None:
249        print("Time walk look up table: ")
250        print(timewalk_lut.head(15).to_string())

Script Output

Example Output
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']
<KeysViewHDF5 ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']>
Pixel Hits: 
               toa  cid   tot    dtoa    x    y  tof
0  11.034546265625    0   200       0  140  193   -1
1  11.034546289063    1   150       0  143  193   -1
2  11.034589642188    2  1475       0   68   92   -1
3  11.034589645313    2   850   31250   67   92   -1
4  11.034589664062    2   300  218750   68   91   -1
5  11.034589689062    2   150  468750   67   91   -1
6  11.034642642187    3   800       0   80   80   -1
7  11.034642664063    3   250  218750   80   81   -1
8  11.034642673438    3   250  312500   79   80   -1
9  11.034697328125    4   375       0   14   18   -1
10 11.034697331250    4   375   31250   14   19   -1
11 11.034926401563    5   375       0   49    5   -1
12 11.035060204688    6  1675       0  146  106   -1
13 11.035060212500    6   425   78125  146  105   -1
14 11.035060223438    6   350  187500  145  106   -1
Clusters: 
    id  size            ctoa  sum_tot  ctot   cx   cy
0    0     1 11.034546265625      200   200  140  193
1    1     1 11.034546289063      150   150  143  193
2    2     4 11.034589642188     2775  1475   68   92
3    3     3 11.034642642187     1300   800   80   80
4    4     2 11.034697328125      750   375   14   18
5    5     1 11.034926401563      375   375   49    5
6    6     4 11.035060204688     2600  1675  146  106
7    7     5 11.035102109375     2325  1125  142    0
8    8     4 11.035375562500     1700   700   38  195
9    9     4 11.035440190625     2500   925   63   22
10  10     4 11.035586917187     2875  1250   91   29
11  11     6 11.035670587500     2325   925  245  207
12  12     3 11.036035384375     1425   850  174  189
13  13     4 11.036084210938     3200   975  141  107
14  14     4 11.036241243750     2575  1725  130  239