11"""Benchmarks for VOTable binary/binary2 parsing performance."""
22import io
3- import os
4- import tempfile
5-
63import numpy as np
74from astropy .io .votable import parse , from_table
85from astropy .table import Table
2017id_data = np .arange (LARGE_SIZE , dtype = np .int64 )
2118flag_data = np .random .choice ([True , False ], LARGE_SIZE )
2219quality_data = np .random .randint (0 , 256 , LARGE_SIZE , dtype = np .uint8 )
20+ bool_data = np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool )
2321
2422short_names = np .array ([f"OBJ_{ i :08d} " for i in range (LARGE_SIZE )])
25- filter_names = np .random .choice (['u' , 'g' , 'r' , 'i' , 'z' , 'Y' ], LARGE_SIZE )
23+ filter_names = np .random .choice (["u" , "g" , "r" , "i" , "z" , "Y" ], LARGE_SIZE )
2624classifications = np .random .choice (
27- ['STAR' , 'GALAXY' , 'QSO' , 'UNKNOWN' ], LARGE_SIZE
25+ ["STAR" , "GALAXY" , "QSO" , "UNKNOWN" ], LARGE_SIZE )
26+ long_descriptions = np .array (
27+ [
28+ f"Extend description about a field { i // 1000 :04d} "
29+ for i in range (LARGE_SIZE )
30+ ]
2831)
29- long_descriptions = np .array ([
30- f"Extend description about a field { i // 1000 :04d} "
31- for i in range (LARGE_SIZE )
32- ])
3332
3433
35- def create_votable_bytes (table_data , format_type = 'binary2' ):
34+ def create_votable_bytes (
35+ table_data ,
36+ format_type = "binary2" ,
37+ bitarray_size = None ):
3638 """Helper to create VOTables with a specific serialization."""
3739 votable = from_table (table_data )
40+
41+ if bitarray_size is not None :
42+ first_table = votable .get_first_table ()
43+ for field in first_table .fields :
44+ if field .datatype == "bit" :
45+ field .arraysize = str (bitarray_size )
46+
3847 output = io .BytesIO ()
3948 votable .to_xml (output , tabledata_format = format_type )
4049 return output .getvalue ()
@@ -52,13 +61,15 @@ def setup(self):
5261 flux_data [:LARGE_SIZE ],
5362 count_data [:LARGE_SIZE ],
5463 id_data [:LARGE_SIZE ],
55- quality_data [:LARGE_SIZE ]
64+ quality_data [:LARGE_SIZE ],
5665 ],
57- names = ['ra' , ' dec' , ' mag' , ' flux' , ' counts' , 'id' , ' quality' ]
66+ names = ["ra" , " dec" , " mag" , " flux" , " counts" , "id" , " quality" ],
5867 )
5968
60- self .binary_data = create_votable_bytes (table , 'binary' )
61- self .binary2_data = create_votable_bytes (table , 'binary2' )
69+ self .binary_data = create_votable_bytes (
70+ table , "binary" , bitarray_size = 8 )
71+ self .binary2_data = create_votable_bytes (
72+ table , "binary2" , bitarray_size = 8 )
6273
6374 def time_numeric_binary (self ):
6475 parse (io .BytesIO (self .binary_data ))
@@ -78,13 +89,13 @@ def setup(self):
7889 short_names [:LARGE_SIZE ],
7990 filter_names [:LARGE_SIZE ],
8091 classifications [:LARGE_SIZE ],
81- mag_data [:LARGE_SIZE ]
92+ mag_data [:LARGE_SIZE ],
8293 ],
83- names = ['ra' , ' dec' , ' object_id' , ' filter' , ' class' , ' mag' ]
94+ names = ["ra" , " dec" , " object_id" , " filter" , " class" , " mag" ],
8495 )
8596
86- self .binary_data = create_votable_bytes (table , ' binary' )
87- self .binary2_data = create_votable_bytes (table , ' binary2' )
97+ self .binary_data = create_votable_bytes (table , " binary" )
98+ self .binary2_data = create_votable_bytes (table , " binary2" )
8899
89100 def time_short_strings_binary (self ):
90101 parse (io .BytesIO (self .binary_data ))
@@ -102,13 +113,13 @@ def setup(self):
102113 ra_data [:LARGE_SIZE ],
103114 dec_data [:LARGE_SIZE ],
104115 long_descriptions [:LARGE_SIZE ],
105- mag_data [:LARGE_SIZE ]
116+ mag_data [:LARGE_SIZE ],
106117 ],
107- names = ['ra' , ' dec' , ' description' , ' mag' ]
118+ names = ["ra" , " dec" , " description" , " mag" ],
108119 )
109120
110- self .binary_data = create_votable_bytes (table , ' binary' )
111- self .binary2_data = create_votable_bytes (table , ' binary2' )
121+ self .binary_data = create_votable_bytes (table , " binary" )
122+ self .binary2_data = create_votable_bytes (table , " binary2" )
112123
113124 def time_long_strings_binary (self ):
114125 parse (io .BytesIO (self .binary_data ))
@@ -126,19 +137,25 @@ def setup(self):
126137 short_names [:LARGE_SIZE ],
127138 filter_names [:LARGE_SIZE ],
128139 classifications [:LARGE_SIZE ],
129- np .random .choice (['A' , 'B' , 'C' , 'D' ], LARGE_SIZE ),
130- np .random .choice ([' HIGH' , ' MED' , ' LOW' ], LARGE_SIZE ),
140+ np .random .choice (["A" , "B" , "C" , "D" ], LARGE_SIZE ),
141+ np .random .choice ([" HIGH" , " MED" , " LOW" ], LARGE_SIZE ),
131142 long_descriptions [:LARGE_SIZE ],
132143 ra_data [:LARGE_SIZE ],
133- dec_data [:LARGE_SIZE ]
144+ dec_data [:LARGE_SIZE ],
134145 ],
135146 names = [
136- 'id' , 'filter' , 'class' , 'grade' ,
137- 'priority' , 'desc' , 'ra' , 'dec'
138- ]
147+ "id" ,
148+ "filter" ,
149+ "class" ,
150+ "grade" ,
151+ "priority" ,
152+ "desc" ,
153+ "ra" ,
154+ "dec" ,
155+ ],
139156 )
140157
141- self .binary2_data = create_votable_bytes (table , ' binary2' )
158+ self .binary2_data = create_votable_bytes (table , " binary2" )
142159
143160 def time_string_intensive_binary2 (self ):
144161 parse (io .BytesIO (self .binary2_data ))
@@ -162,13 +179,21 @@ def setup(self):
162179 np .random .choice ([True , False ], LARGE_SIZE ),
163180 ],
164181 names = [
165- 'ra' , 'dec' , 'saturated' , 'flagged' , 'edge_pixel' ,
166- 'cosmic_ray' , 'variable' , 'extended' , 'public' , 'calibrated'
167- ]
182+ "ra" ,
183+ "dec" ,
184+ "saturated" ,
185+ "flagged" ,
186+ "edge_pixel" ,
187+ "cosmic_ray" ,
188+ "variable" ,
189+ "extended" ,
190+ "public" ,
191+ "calibrated" ,
192+ ],
168193 )
169194
170- self .binary_data = create_votable_bytes (table , ' binary' )
171- self .binary2_data = create_votable_bytes (table , ' binary2' )
195+ self .binary_data = create_votable_bytes (table , " binary" )
196+ self .binary2_data = create_votable_bytes (table , " binary2" )
172197
173198 def time_booleans_binary (self ):
174199 parse (io .BytesIO (self .binary_data ))
@@ -177,6 +202,57 @@ def time_booleans_binary2(self):
177202 parse (io .BytesIO (self .binary2_data ))
178203
179204
205+ class TimeVOTableBitArrayOptimization :
206+ """Benchmark BitArray columns in Binary/Binary2 VOTables."""
207+
208+ def setup (self ):
209+ table = Table (
210+ [
211+ ra_data [:LARGE_SIZE ],
212+ dec_data [:LARGE_SIZE ],
213+ mag_data [:LARGE_SIZE ],
214+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
215+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
216+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
217+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
218+ ],
219+ names = [
220+ "ra" ,
221+ "dec" ,
222+ "mag" ,
223+ "detected" ,
224+ "saturated" ,
225+ "edge_pixel" ,
226+ "cosmic_ray" ,
227+ ],
228+ )
229+
230+ self .binary_bitarray_8_data = create_votable_bytes (
231+ table , "binary" , "8" )
232+ self .binary_bitarray_16_data = create_votable_bytes (
233+ table , "binary" , "16" )
234+ self .binary2_bitarray_8_data = create_votable_bytes (
235+ table , "binary2" , "8" )
236+ self .binary2_bitarray_16_data = create_votable_bytes (
237+ table , "binary2" , "16" )
238+
239+ def time_bitarray_8bit_binary (self ):
240+ """Parse BitArray with 8-bit arraysize."""
241+ parse (io .BytesIO (self .binary_bitarray_8_data ))
242+
243+ def time_bitarray_16bit_binary (self ):
244+ """Parse BitArray with 16-bit arraysize."""
245+ parse (io .BytesIO (self .binary_bitarray_16_data ))
246+
247+ def time_bitarray_8bit_binary2 (self ):
248+ """Parse binary2 BitArray with 8-bit arraysize."""
249+ parse (io .BytesIO (self .binary2_bitarray_8_data ))
250+
251+ def time_bitarray_16bit_binary2 (self ):
252+ """Parse binary2 BitArray with 16-bit arraysize."""
253+ parse (io .BytesIO (self .binary2_bitarray_16_data ))
254+
255+
180256class TimeVOTableMixed :
181257 """Benchmark for a table with mixed fields types."""
182258
@@ -195,13 +271,21 @@ def setup(self):
195271 flag_data [:LARGE_SIZE ],
196272 ],
197273 names = [
198- 'ra' , 'dec' , 'id' , 'mag' , 'flux' ,
199- 'filter' , 'class' , 'counts' , 'quality' , 'detected'
200- ]
274+ "ra" ,
275+ "dec" ,
276+ "id" ,
277+ "mag" ,
278+ "flux" ,
279+ "filter" ,
280+ "class" ,
281+ "counts" ,
282+ "quality" ,
283+ "detected" ,
284+ ],
201285 )
202286
203- self .binary_data = create_votable_bytes (table , ' binary' )
204- self .binary2_data = create_votable_bytes (table , ' binary2' )
287+ self .binary_data = create_votable_bytes (table , " binary" )
288+ self .binary2_data = create_votable_bytes (table , " binary2" )
205289
206290 def time_mixed_binary (self ):
207291 parse (io .BytesIO (self .binary_data ))
@@ -218,13 +302,13 @@ def setup(self):
218302 [
219303 ra_data [:SMALL_SIZE ],
220304 dec_data [:SMALL_SIZE ],
221- mag_data [:SMALL_SIZE ]
305+ mag_data [:SMALL_SIZE ],
222306 ],
223- names = ['ra' , ' dec' , ' mag' ]
307+ names = ["ra" , " dec" , " mag" ],
224308 )
225309
226- self .binary_data = create_votable_bytes (table , ' binary' )
227- self .binary2_data = create_votable_bytes (table , ' binary2' )
310+ self .binary_data = create_votable_bytes (table , " binary" )
311+ self .binary2_data = create_votable_bytes (table , " binary2" )
228312
229313 def time_small_binary (self ):
230314 parse (io .BytesIO (self .binary_data ))
0 commit comments