Read Data¶
This example demonstrates how to read the HDF5 output of Luna using h5py and pandas.
Luna Command¶
./tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.h5 --eps-t 150ns --eps-s 1 --layout single
Python Script¶
1import os, sys
2from typing import *
3import h5py # ensure you have `pip install h5py`
4import pandas as pd # ensure you have `pip install pandas`
5import numpy as np
6# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
7# to a hdf5 file generated by luna. Replace with your own!
8sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
9from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
10
11
12def load_pixel_hits(hdf5_fname: str) -> pd.DataFrame:
13 """
14 Load pixel hits data from an HDF5 file.
15
16 Parameters:
17 -----------
18 hdf5_fname : str
19 The path to the HDF5 file.
20
21 Returns:
22 --------
23 pd.DataFrame
24 A DataFrame containing the pixel hits data.
25 """
26 with h5py.File(hdf5_fname, 'r') as hdf5_file:
27 print(f"hdf5 datasets: {list(hdf5_file.keys())}")
28 pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
29 return pixel_hits
30
31
32def load_clusters(hdf5_fname: str) -> pd.DataFrame:
33 """
34 Load clusters data from an HDF5 file, if available.
35
36 Parameters:
37 -----------
38 hdf5_fname : str
39 The path to the HDF5 file.
40
41 Returns:
42 --------
43 pd.DataFrame
44 A DataFrame containing the clusters data.
45 """
46 with (h5py.File(hdf5_fname, 'r') as hdf5_file):
47 clusters = pd.DataFrame(hdf5_file["Clusters"][:])
48 return clusters
49
50
51
52def load_timewalk_matrix(DATA_FNAME) -> pd.DataFrame:
53 """
54 Load the timewalk matrix from an HDF5 file.
55
56 The timewalk matrix is a long form matrix with columns:
57 - CToT
58 - ToT
59 - AverageDToA
60 - SumSquareDiff
61 - Count
62 - Std (standard deviation)
63 - Sem (Standard error of mean)
64
65 Parameters:
66 -----------
67 DATA_FNAME : str
68 The path to the HDF5 file.
69
70 Returns:
71 --------
72 pd.DataFrame
73 A DataFrame containing the timewalk matrix data.
74 """
75 with h5py.File(DATA_FNAME, "r") as f:
76 if "TimewalkMatrix" in f.keys():
77 ds = f["TimewalkMatrix"]
78 timewalk_matrix = pd.DataFrame(ds[:])
79 colnames: List[Tuple[bytes]] = ds.attrs["col_names"]
80 # decode from bytes
81 colnames: List[str] = [i[0].decode() for i in colnames]
82 timewalk_matrix.columns = colnames
83 else:
84 return
85
86 integer_types = ["CToT", "ToT", "Count"]
87 timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
88 return timewalk_matrix
89
90
91def load_timewalk_lookup_table(DATA_FNAME) -> pd.DataFrame:
92 """
93 Load the timewalk lookup table from an HDF5 file.
94
95 Parameters:
96 -----------
97 DATA_FNAME : str
98 The path to the HDF5 file.
99
100 Returns:
101 --------
102 pd.DataFrame
103 A DataFrame containing the timewalk lookup table data.
104 """
105 with h5py.File(DATA_FNAME, "r") as f:
106 if "TimewalkLookupTable" in f.keys():
107 ds = f["TimewalkLookupTable"]
108 lookup_df = pd.DataFrame(ds)
109 lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
110 else:
111 return None
112 # column names:
113 # ToT Average SumSquareDiff Std Count
114 lookup_df = lookup_df.dropna(axis=0, how="all")
115 integer_types = ["ToT", "Count"]
116 lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
117 lookup_df.set_index("ToT", inplace=True)
118
119 return lookup_df
120
121
122
123if __name__ == "__main__":
124 # 12 decimals
125 pd.set_option('display.float_format', '{:.12f}'.format)
126
127 pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5)
128 clusters = load_clusters(EXAMPLE_DATA_HDF5)
129 timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5)
130 timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5)
131
132 print("Pixel Hits: ")
133 print(pixel_hits.head(15).to_string())
134
135 print("Clusters: ")
136 print(clusters.head(15).to_string())
137
138 if timewalk_matrix is not None:
139 print("Time walk matrix: ")
140 print(timewalk_matrix.iloc[0:10, 0:10].to_string())
141
142 if timewalk_lut is not None:
143 print("Time walk look up table: ")
144 print(timewalk_lut.head(15).to_string())
Script Output¶
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits']
Pixel Hits:
toa cid tot dtoa x y tof
0 110345462656250 0 200 0 140 193 -1
1 110345462890625 1 150 0 143 193 -1
2 110345896421875 2 1475 0 68 92 -1
3 110345896453125 2 850 31250 67 92 -1
4 110345896640625 2 300 218750 68 91 -1
5 110345896890625 2 150 468750 67 91 -1
6 110346426421875 3 800 0 80 80 -1
7 110346426640625 3 250 218750 80 81 -1
8 110346426734375 3 250 312500 79 80 -1
9 110346973281250 4 375 0 14 18 -1
10 110346973312500 4 375 31250 14 19 -1
11 110349264015625 5 375 0 49 5 -1
12 110350602046875 6 1675 0 146 106 -1
13 110350602125000 6 425 78125 146 105 -1
14 110350602234375 6 350 187500 145 106 -1
Clusters:
id size ctoa sum_tot ctot cx cy
0 0 1 110345462656250 200 200 140 193
1 1 1 110345462890625 150 150 143 193
2 2 4 110345896421875 2775 1475 68 92
3 3 3 110346426421875 1300 800 80 80
4 4 2 110346973281250 750 375 14 18
5 5 1 110349264015625 375 375 49 5
6 6 4 110350602046875 2600 1675 146 106
7 7 2 110351021093750 1475 1125 142 0
8 8 3 110351021156250 850 425 143 2
9 9 4 110353755625000 1700 700 38 195
10 10 4 110354401906250 2500 925 63 22
11 11 4 110355869171875 2875 1250 91 29
12 12 6 110356705875000 2325 925 245 207
13 13 3 110360353843750 1425 850 174 189
14 14 4 110360842109375 3200 975 141 107