Read data, Unit Conversion¶
This example demonstrates how to read the HDF5 output of Luna using h5py and pandas and shows you how to convert the ToA time units to whatever you like.
Luna Command¶
./tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.h5 --eps-t 200ns --eps-s 2 --layout single
Python Script¶
1import os, sys
2from enum import Enum
3from typing import Literal
4
5import h5py # ensure you have `pip install h5py`
6import numpy as np
7import pandas as pd # ensure you have `pip install pandas`
8
9# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
10# to a hdf5 file generated by luna
11sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
12from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
13
14
15class DetectorType(Enum):
16 """Enum for specifying the detector type."""
17 TPX3 = "tpx3"
18 TPX4 = "tpx4"
19
20
21class TimeUnit(Enum):
22 """Enum for specifying time units."""
23 Seconds = 's'
24 Milliseconds = 'ms'
25 Microseconds = 'us'
26 Nanoseconds = 'ns'
27 Picoseconds = 'ps'
28 Femtoseconds100 = 'fs100'
29 Femtoseconds = 'fs'
30 Attoseconds100 = 'as100'
31 Attoseconds = 'as'
32
33
34# tpx4 users should change this to DetectorType.TPX4
35DETECTOR_TYPE: DetectorType = DetectorType.TPX3
36
37# toa unit in luna output is 1e-13 in tpx3 and 1e-16 in tpx4
38INTERNAL_TOA_UNITS = TimeUnit.Femtoseconds100 if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
39
40# in tpx3 tot is in increments of 25ns but in tpx4 tot is the same time resolution as toa.
41INTERNAL_TOT_UNITS = TimeUnit.Nanoseconds if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
42
43
44def convert_time_units(value: float | pd.Series | pd.DataFrame, from_unit: TimeUnit, to_unit: TimeUnit):
45 """
46 Convert time units from one unit to another.
47
48 Parameters:
49 -----------
50 value : float | pd.Series | pd.DataFrame
51 The value(s) to be converted.
52
53 from_unit : TimeUnit
54 The unit of the input value(s).
55
56 to_unit : TimeUnit
57 The unit to convert the value(s) to.
58
59 Returns:
60 --------
61 float | pd.Series | pd.DataFrame
62 The converted value(s).
63 """
64 conversion_factors = {
65 TimeUnit.Seconds: 1,
66 TimeUnit.Milliseconds: 1e-3,
67 TimeUnit.Microseconds: 1e-6,
68 TimeUnit.Nanoseconds: 1e-9,
69 TimeUnit.Picoseconds: 1e-12,
70 TimeUnit.Femtoseconds100: 1e-13,
71 TimeUnit.Femtoseconds: 1e-15,
72 TimeUnit.Attoseconds100: 1e-16,
73 TimeUnit.Attoseconds: 1e-18,
74 }
75
76 value_in_seconds = value * conversion_factors[from_unit]
77 return value_in_seconds / conversion_factors[to_unit]
78
79
80def load_pixel_hits(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
81 """
82 Load pixel hits data from an HDF5 file and convert time units.
83
84 Parameters:
85 -----------
86 hdf5_fname : str
87 The path to the HDF5 file.
88
89 toa_unit : TimeUnit
90 The time unit to convert the 'toa' columns to.
91
92 Returns:
93 --------
94 pd.DataFrame
95 A DataFrame containing the pixel hits data with converted time units.
96 """
97 with h5py.File(hdf5_fname, 'r') as hdf5_file:
98 print(f"hdf5 datasets: {list(hdf5_file.keys())}")
99 pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
100 # Convert 'toa' columns to specified time units
101 pixel_hits['toa'] = convert_time_units(pixel_hits['toa'], INTERNAL_TOA_UNITS, toa_unit)
102 return pixel_hits
103
104
105def load_clusters(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
106 """
107 Load clusters data from an HDF5 file and convert time units.
108
109 Parameters:
110 -----------
111 hdf5_fname : str
112 The path to the HDF5 file.
113
114 toa_unit : TimeUnit
115 The time unit to convert the 'toa' columns to.
116
117 Returns:
118 --------
119 pd.DataFrame
120 A DataFrame containing the clusters data with converted time units.
121 """
122 with h5py.File(hdf5_fname, 'r') as hdf5_file:
123 clusters = pd.DataFrame(hdf5_file["Clusters"][:])
124 # Convert relevant 'toa' columns to specified time units
125 clusters['ctoa'] = convert_time_units(clusters['ctoa'], INTERNAL_TOA_UNITS, toa_unit)
126 return clusters
127
128
129def load_timewalk_matrix(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
130 """
131 Load the timewalk matrix from an HDF5 file and convert time units.
132
133 The timewalk matrix is a long form matrix with columns:
134 - CToT
135 - ToT
136 - AverageDToA
137 - SumSquareDiff
138 - Count
139 - Std (standard deviation)
140 - Sem (Standard error of mean)
141
142 Parameters:
143 -----------
144 hdf5_fname : str
145 The path to the HDF5 file.
146
147 toa_unit : TimeUnit
148 The time unit to convert the time columns to.
149
150 Returns:
151 --------
152 pd.DataFrame
153 A DataFrame containing the timewalk matrix data with converted time units.
154 """
155 with h5py.File(hdf5_fname, "r") as f:
156 print(f.keys())
157 if "TimewalkMatrix" in f.keys():
158 ds = f["TimewalkMatrix"]
159 timewalk_matrix = pd.DataFrame(ds[:])
160 colnames = ds.attrs["col_names"]
161 # decode from bytes
162 colnames = [i[0].decode() for i in colnames]
163 timewalk_matrix.columns = colnames
164 else:
165 return
166
167 # all column names:
168 # CToT, ToT, AverageDToA, SumSquareDiff, Count, Std, Sem
169
170 integer_types = ["CToT", "ToT", "Count"]
171 timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
172
173 time_columns = ['AverageDToA', 'SumSquareDiff', 'Std', 'Sem'] # time
174 timewalk_matrix[time_columns] = convert_time_units(
175 timewalk_matrix[time_columns],
176 INTERNAL_TOA_UNITS, toa_unit
177 )
178
179 return timewalk_matrix
180
181
182def load_timewalk_lookup_table(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
183 """
184 Load the timewalk lookup table from an HDF5 file and convert time units.
185
186 The timewalk lookup table contains columns:
187 - ToT
188 - Average
189 - SumSquareDiff
190 - Std (standard deviation)
191 - Count
192
193 Parameters:
194 -----------
195 hdf5_fname : str
196 The path to the HDF5 file.
197
198 toa_unit : TimeUnit
199 The time unit to convert the time columns to.
200
201 Returns:
202 --------
203 pd.DataFrame
204 A DataFrame containing the timewalk lookup table data with converted time units.
205 """
206 with h5py.File(hdf5_fname, "r") as f:
207 if "TimewalkLookupTable" in f.keys():
208 ds = f["TimewalkLookupTable"]
209 lookup_df = pd.DataFrame(ds)
210 lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
211 else:
212 return
213 # column names:
214 # ToT Average SumSquareDiff Std Count
215 lookup_df = lookup_df.dropna(axis=0, how="all")
216 integer_types = ["ToT", "Count"]
217 lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
218 lookup_df.set_index("ToT", inplace=True)
219
220 time_columns = ['AverageDToA', 'SumSquareDiff', 'Std'] # time
221 lookup_df[time_columns] = convert_time_units(
222 lookup_df[time_columns],
223 INTERNAL_TOA_UNITS, toa_unit
224 )
225
226 return lookup_df
227
228
229if __name__ == "__main__":
230 pd.set_option('display.float_format', '{:.12f}'.format)
231 toa_units = TimeUnit.Seconds
232
233 pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5, toa_units)
234 clusters = load_clusters(EXAMPLE_DATA_HDF5, toa_units)
235 timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
236 timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
237
238 print("Pixel Hits: ")
239 print(pixel_hits.head(15).to_string())
240
241 print("Clusters: ")
242 print(clusters.head(15).to_string())
243
244 if timewalk_matrix is not None:
245 print("Time walk matrix: ")
246 print(timewalk_matrix.iloc[0:10, 0:10].to_string())
247
248 if timewalk_lut is not None:
249 print("Time walk look up table: ")
250 print(timewalk_lut.head(15).to_string())
Script Output¶
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']
<KeysViewHDF5 ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']>
Pixel Hits:
toa cid tot dtoa x y tof
0 11.034546265625 0 200 0 140 193 -1
1 11.034546289063 1 150 0 143 193 -1
2 11.034589642188 2 1475 0 68 92 -1
3 11.034589645313 2 850 31250 67 92 -1
4 11.034589664062 2 300 218750 68 91 -1
5 11.034589689062 2 150 468750 67 91 -1
6 11.034642642187 3 800 0 80 80 -1
7 11.034642664063 3 250 218750 80 81 -1
8 11.034642673438 3 250 312500 79 80 -1
9 11.034697328125 4 375 0 14 18 -1
10 11.034697331250 4 375 31250 14 19 -1
11 11.034926401563 5 375 0 49 5 -1
12 11.035060204688 6 1675 0 146 106 -1
13 11.035060212500 6 425 78125 146 105 -1
14 11.035060223438 6 350 187500 145 106 -1
Clusters:
id size ctoa sum_tot ctot cx cy
0 0 1 11.034546265625 200 200 140 193
1 1 1 11.034546289063 150 150 143 193
2 2 4 11.034589642188 2775 1475 68 92
3 3 3 11.034642642187 1300 800 80 80
4 4 2 11.034697328125 750 375 14 18
5 5 1 11.034926401563 375 375 49 5
6 6 4 11.035060204688 2600 1675 146 106
7 7 5 11.035102109375 2325 1125 142 0
8 8 4 11.035375562500 1700 700 38 195
9 9 4 11.035440190625 2500 925 63 22
10 10 4 11.035586917187 2875 1250 91 29
11 11 6 11.035670587500 2325 925 245 207
12 12 3 11.036035384375 1425 850 174 189
13 13 4 11.036084210938 3200 975 141 107
14 14 4 11.036241243750 2575 1725 130 239