7878 let mut stats = Default :: default ( ) ;
7979 let has_ef = std:: fs:: metadata ( args. src . with_extension ( "ef" ) ) . is_ok_and ( |x| x. is_file ( ) ) ;
8080
81+ // Load the compression flags from the properties file so we can compare them
82+ let ( _, _, comp_flags) = parse_properties :: < E > ( args. src . with_extension ( PROPERTIES_EXTENSION ) ) ?;
83+
8184 if has_ef {
8285 log:: info!(
8386 "Analyzing codes in parallel using {} threads" ,
@@ -163,8 +166,41 @@ where
163166 } ) ;
164167 }
165168
169+ println ! ( "Default codes" ) ;
170+ compare_codes ( & stats, CompFlags :: default ( ) , args. top_k ) ;
171+
172+ print ! ( "\n \n \n " ) ;
173+
174+ println ! ( "Current codes" ) ;
175+ compare_codes ( & stats, comp_flags, args. top_k ) ;
176+
177+ Ok ( ( ) )
178+ }
179+
180+ /// Get the size in bits used by a given code.
181+ /// This should go in dsi-bitstream eventually.
182+ fn get_size_by_code ( stats : & CodesStats , code : Codes ) -> Option < u64 > {
183+ match code {
184+ Codes :: Unary => Some ( stats. unary ) ,
185+ Codes :: Gamma => Some ( stats. gamma ) ,
186+ Codes :: Delta => Some ( stats. delta ) ,
187+ Codes :: Omega => Some ( stats. omega ) ,
188+ Codes :: VByteBe | Codes :: VByteLe => Some ( stats. vbyte ) ,
189+ Codes :: Zeta ( k) => stats. zeta . get ( k - 1 ) . copied ( ) ,
190+ Codes :: Golomb ( b) => stats. golomb . get ( b as usize - 1 ) . copied ( ) ,
191+ Codes :: ExpGolomb ( k) => stats. exp_golomb . get ( k) . copied ( ) ,
192+ Codes :: Rice ( k) => stats. rice . get ( k) . copied ( ) ,
193+ Codes :: Pi ( 0 ) => Some ( stats. gamma ) , // Pi(0) is Gamma
194+ Codes :: Pi ( 1 ) => Some ( stats. zeta [ 1 ] ) , // Pi(1) is Zeta(2)
195+ Codes :: Pi ( k) => stats. pi . get ( k - 2 ) . copied ( ) ,
196+ _ => unreachable ! ( "Code {:?} not supported" , code) ,
197+ }
198+ }
199+
200+ /// Print the statistics of how much the optimal codes improve over the reference ones.
201+ pub fn compare_codes ( stats : & DecoderStats , reference : CompFlags , top_k : usize ) {
166202 macro_rules! impl_best_code {
167- ( $new_bits: expr, $old_bits: expr, $stats: expr, $( $code: ident - $old: expr) ,* ) => {
203+ ( $new_bits: expr, $old_bits: expr, $stats: expr, $( $code: ident -> $old: expr) ,* ) => {
168204 println!( "{:>17} {:>20} {:>12} {:>10} {:>10} {:>16}" ,
169205 "Type" , "Code" , "Improvement" , "Weight" , "Bytes" , "Bits" ,
170206 ) ;
@@ -189,7 +225,7 @@ where
189225 normalize( best_size as f64 / 8.0 ) ,
190226 best_size,
191227 ) ;
192- for i in 1 ..args . top_k. min( codes. len( ) ) . max( 1 ) {
228+ for i in 1 ..top_k. min( codes. len( ) ) . max( 1 ) {
193229 let ( code, size) = codes[ i] ;
194230 let improvement = 100.0 * ( $old as f64 - size as f64 ) / $old as f64 ;
195231 println!( "{:>17} {:>20} {:>12.3}% {:>10.3} {:>10} {:>16}" ,
@@ -206,21 +242,36 @@ where
206242 } ;
207243 }
208244
245+ println ! ( "Code optimization results against:" ) ;
246+ for ( name, code) in [
247+ ( "outdegrees" , reference. outdegrees ) ,
248+ ( "reference offsets" , reference. references ) ,
249+ ( "block counts" , reference. blocks ) ,
250+ ( "blocks" , reference. blocks ) ,
251+ ( "interval counts" , reference. intervals ) ,
252+ ( "interval starts" , reference. intervals ) ,
253+ ( "interval lengths" , reference. intervals ) ,
254+ ( "first residuals" , reference. residuals ) ,
255+ ( "residuals" , reference. residuals ) ,
256+ ] {
257+ println ! ( "\t {:>18} : {:?}" , name, code) ;
258+ }
259+
209260 let mut new_bits = 0 ;
210261 let mut old_bits = 0 ;
211262 impl_best_code ! (
212263 new_bits,
213264 old_bits,
214265 stats,
215- outdegrees - stats. outdegrees. gamma ,
216- reference_offsets - stats. reference_offsets. unary ,
217- block_counts - stats. block_counts. gamma ,
218- blocks - stats. blocks. gamma ,
219- interval_counts - stats. interval_counts. gamma ,
220- interval_starts - stats. interval_starts. gamma ,
221- interval_lens - stats. interval_lens. gamma ,
222- first_residuals - stats. first_residuals. zeta [ 2 ] ,
223- residuals - stats. residuals. zeta [ 2 ]
266+ outdegrees -> get_size_by_code ( & stats. outdegrees, reference . outdegrees ) . unwrap ( ) ,
267+ reference_offsets -> get_size_by_code ( & stats. reference_offsets, reference . references ) . unwrap ( ) ,
268+ block_counts -> get_size_by_code ( & stats. block_counts, reference . blocks ) . unwrap ( ) ,
269+ blocks -> get_size_by_code ( & stats. blocks, reference . blocks ) . unwrap ( ) ,
270+ interval_counts -> get_size_by_code ( & stats. interval_counts, reference . intervals ) . unwrap ( ) ,
271+ interval_starts -> get_size_by_code ( & stats. interval_starts, reference . intervals ) . unwrap ( ) ,
272+ interval_lens -> get_size_by_code ( & stats. interval_lens, reference . intervals ) . unwrap ( ) ,
273+ first_residuals -> get_size_by_code ( & stats. first_residuals, reference . residuals ) . unwrap ( ) ,
274+ residuals -> get_size_by_code ( & stats. residuals, reference . residuals ) . unwrap ( )
224275 ) ;
225276
226277 println ! ( ) ;
@@ -239,7 +290,6 @@ where
239290 " Improvement: {:>15.3}%" ,
240291 100.0 * ( old_bits - new_bits) as f64 / old_bits as f64
241292 ) ;
242- Ok ( ( ) )
243293}
244294
245295fn normalize ( mut value : f64 ) -> String {
0 commit comments