diff --git a/cpp/examples/parquet/CMakeLists.txt b/cpp/examples/parquet/CMakeLists.txt index cc8f5a98150..7cbbf8efa0a 100644 --- a/cpp/examples/parquet/CMakeLists.txt +++ b/cpp/examples/parquet/CMakeLists.txt @@ -21,6 +21,16 @@ add_executable(parquet-arrow-example parquet-arrow/reader-writer.cc) add_executable(parquet-stream-api-example parquet-stream-api/stream-reader-writer.cc) target_include_directories(parquet-low-level-example PRIVATE low-level-api/) target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/) +add_executable(parquet-with-index-page-skipping low-level-api/reader-writer-with-index.cc) +add_executable(parquet-reader-with-pageindex low-level-api/reader-with-index.cc) +add_executable(parquet-writer-with-pageindex low-level-api/writer-with-index.cc) +target_include_directories(parquet-low-level-example PRIVATE low-level-api/) +target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/) +target_include_directories(parquet-reader-with-pageindex PRIVATE low-level-api/) +target_include_directories(parquet-writer-with-pageindex PRIVATE low-level-api/) +target_link_libraries(parquet-low-level-example parquet_static) +target_link_libraries(parquet-low-level-example2 parquet_static) +target_link_libraries(parquet-with-index-page-skipping parquet_static) # The variables in these files are for illustration purposes set(PARQUET_EXAMPLES_WARNING_SUPPRESSIONS @@ -47,6 +57,8 @@ if(UNIX) PROPERTY COMPILE_FLAGS "-Wno-unused-variable") endforeach() endif() +target_link_libraries(parquet-reader-with-pageindex parquet_static) +target_link_libraries(parquet-writer-with-pageindex parquet_static) # Prefer shared linkage but use static if shared build is deactivated if (ARROW_BUILD_SHARED) @@ -76,3 +88,7 @@ if (PARQUET_REQUIRE_ENCRYPTION) parquet-encryption-example parquet-encryption-example-all-crypto-options) endif() + parquet-with-index-page-skipping + parquet-writer-with-pageindex + parquet-reader-with-pageindex + parquet-arrow-example) diff --git a/cpp/examples/parquet/low-level-api/page-index-reader-test.sh b/cpp/examples/parquet/low-level-api/page-index-reader-test.sh new file mode 100755 index 00000000000..ddab6d0b5d8 --- /dev/null +++ b/cpp/examples/parquet/low-level-api/page-index-reader-test.sh @@ -0,0 +1,15 @@ +## member queries +echo "Launching member queries.." +$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_m_sorted.parquet 1000000 & + +$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_m_unsorted.parquet 1000000 & + +## non-member queries +echo "launching non-member queries.." +$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_n_sorted.parquet 10000000 & + +$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_n_unsorted.parquet 10000000 & + +#perf record -ag -e faults -p $pid + +#iostat -k 1 -p sda > ~/parquet_data/debug_read_writes \ No newline at end of file diff --git a/cpp/examples/parquet/low-level-api/page-index-writer-test.sh b/cpp/examples/parquet/low-level-api/page-index-writer-test.sh new file mode 100644 index 00000000000..c37b68e3aa7 --- /dev/null +++ b/cpp/examples/parquet/low-level-api/page-index-writer-test.sh @@ -0,0 +1,3 @@ +cd ~/parquet_data/ + +$ARROW_HOME/build/debug/parquet-writer-with-pageindex $1 $2 \ No newline at end of file diff --git a/cpp/examples/parquet/low-level-api/reader-with-index.cc b/cpp/examples/parquet/low-level-api/reader-with-index.cc new file mode 100644 index 00000000000..1025cba87ce --- /dev/null +++ b/cpp/examples/parquet/low-level-api/reader-with-index.cc @@ -0,0 +1,1216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "parquet/api/reader.h" +#include "parquet/column_reader.h" +#include "parquet/column_scanner.h" +#include "parquet/deprecated_io.h" +#include "parquet/exception.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/properties.h" +#include "parquet/schema.h" +#include "parquet/types.h" +#include "parquet/parquet_types.h" +#include "parquet/file_reader.h" +#include + +using parquet::ConvertedType; +using parquet::Repetition; +using parquet::Type; +using parquet::schema::GroupNode; +using parquet::schema::PrimitiveNode; + +/* + * This example illustrates PARQUET-1404 for page level skipping in + * writing and reading Parquet Files in C++ and serves as a + * reference to the API for reader and writer enhanced with Column Index and Offset Index + * The file contains all the physical data types supported by Parquet. + * This example uses the RowGroupWriter API that supports writing RowGroups based on a + *certain size + **/ + +/* Parquet is a structured columnar file format + * Parquet File = "Parquet data" + "Parquet Metadata" + * "Parquet data" is simply a vector of RowGroups. Each RowGroup is a batch of rows in a + * columnar layout + * "Parquet Metadata" contains the "file schema" and attributes of the RowGroups and their + * Columns + * "file schema" is a tree where each node is either a primitive type (leaf nodes) or a + * complex (nested) type (internal nodes) + * For specific details, please refer the format here: + * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md + **/ + +/********************************************************************************* + PARQUET READER WITH PAGE SKIPPING EXAMPLE +**********************************************************************************/ + +constexpr int NUM_ROWS = 20;//2500000; +constexpr int64_t ROW_GROUP_SIZE = (sizeof(uint32_t)+sizeof(int32_t)+sizeof(int64_t)+sizeof(float)+sizeof(double) + + +sizeof(parquet::ByteArray)+sizeof(parquet::FixedLenByteArray))*NUM_ROWS;//16 * 1024 * 1024; // 16 MB +//char PARQUET_FILENAME[] = ""; +//const char PARQUET_FILENAME[] = "/home/abalajiee/parquet_data/testing_write.parquet"; + +int parseLine(char* line){ + // This assumes that a digit will be found and the line ends in " Kb". + int i = strlen(line); + const char* p = line; + while (*p <'0' || *p > '9') p++; + line[i-3] = '\0'; + i = atoi(p); + return i; +} + +int getMemValue(){ //Note: this value is in KB! + FILE* file = fopen("/proc/self/status", "r"); + int result = -1; + char line[128]; + + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "VmRSS:", 6) == 0){ + result = parseLine(line); + break; + } + } + fclose(file); + return result; +} + +int getReadBytesValue(){ //Note: this value is in KB! + FILE* file = fopen("/proc/self/io", "r"); + int result = 0; + char line[128]; + + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "read_bytes:", 11) == 0){ + result = parseLine(line); + break; + } + } + fclose(file); + return result; +} + +int getReadBytesCacheValue(){ //Note: this value is in KB! + FILE* file = fopen("/proc/self/io", "r"); + int result = 0; + char line[128]; + + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "rchar:", 6) == 0){ + result = parseLine(line); + break; + } + } + fclose(file); + return result; +} + +int getWriteBytesValue(){ //Note: this value is in KB! + FILE* file = fopen("/proc/self/io", "r"); + int result = 0; + char line[128]; + int trunc = 0; + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "write_bytes:", 12) == 0){ + result = parseLine(line); + break; + } + if (strncmp(line, "cancelled_write_bytes:", 22) == 0){ + trunc = parseLine(line); + break; + } + } + fclose(file); + return result-trunc; +} + +int getWriteBytesCacheValue(){ //Note: this value is in KB! + FILE* file = fopen("/proc/self/io", "r"); + int result = 0; + char line[128]; + while (fgets(line, 128, file) != NULL){ + if (strncmp(line, "wchar:", 6) == 0){ + result = parseLine(line); + break; + } + } + fclose(file); + return result; +} + +/* +rchar: 91439151 +wchar: 986032 +syscr: 54376 +syscw: 45314 +read_bytes: 17989632 +write_bytes: 626688 +cancelled_write_bytes: 233472 +*/ + +struct return_multiple{ + std::shared_ptr column_reader; + bool b; + int32_t p; + int64_t r; + uint32_t e; + double d; + float i; + char *c,*a,*t; +}; + +typedef return_multiple return_multiple; + +typedef struct time_to_run{ + float wo_index = 0.0; //without index + float wo_total_pages_scanned = 0.0; + float wo_totaltime = 0.0; + float wo_mem_used = 0.0; + float wo_read_bytes = 0.0; + float wo_write_bytes = 0.0; + float w_totaltime = 0.0; //with index without binary without blf + float w_index = 0.0; + float w_total_pages_scanned = 0.0; + float w_mem_used = 0.0; + float w_read_bytes = 0.0; + float w_write_bytes = 0.0; + float b_totaltime = 0.0; //with binary search without blf + float b_index = 0.0; + float b_total_pages_scanned = 0.0; + float b_mem_used = 0.0; + float b_read_bytes = 0.0; + float b_write_bytes = 0.0; + float w_blf_totaltime = 0.0; // with blf without pageblf + float w_blf_index = 0.0; + float w_blf_total_pages_scanned = 0.0; + float w_blf_mem_used = 0.0; + float w_blf_read_bytes = 0.0; + float w_blf_write_bytes = 0.0; + float w_pageblf_totaltime = 0.0; // with blf with pageblf + float w_pageblf_index = 0.0; + float w_pageblf_total_pages_scanned = 0.0; + float w_pageblf_mem_used = 0.0; + float w_pageblf_read_bytes = 0.0; + float w_pageblf_write_bytes = 0.0; + float blf_load_time = 0.0; + float index_load_time = 0.0; + } trun; + +int parquet_writer(int argc, char** argv); + +void returnReaderwithType(std::shared_ptr cr, parquet::ColumnReader*& cr1); + +return_multiple getPredicate(std::shared_ptr cr,std::shared_ptr rg,char* predicate, + int& col_id,int64_t& page_index,int& PREDICATE_COL,int64_t& row_index,bool with_index, + bool binary_search, int64_t& count_pages_scanned, int64_t& total_num_pages, + int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index); + +bool printVal(std::ofstream& runfile, std::shared_ptrcolumn_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals,int64_t& row_counter, + bool checkpredicate,int equal_to); +bool printRange(std::shared_ptrcolumn_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals_min,return_multiple vals_max,int64_t& row_counter); + +trun run_for_one_predicate(std::ofstream& runfile, int num_columns,std::shared_ptr& row_group_reader, std::unique_ptr& parquet_reader, int col_id,char** argv, + int predicate_index, int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf); + +int64_t first_pass_for_predicate_only(std::ofstream& runfile,std::shared_ptr rg,int predicate_column_number,int num_columns, char* predicate, + bool with_index, int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf); + +int parquet_reader(int argc, char** argv); +/**************Declaration END*********************************/ + + +int main(int argc, char** argv) { + + parquet_reader(argc,argv); + + std::cout << "Parquet Writing and Reading Complete" << std::endl; + + return 0; +} + +void getnumrows(char* num,int64_t& num_rows){ + int charlen = strlen(num); + int charin = 0; + while ( num[charin] != '\0' ) { + num_rows += (num[charin] - 48)*((int64_t)pow(10,charlen-charin)); + charin++; + } +} + +int intlog(int num_rows){ + return (int)log10(num_rows); +} + +char* convertToCharptr(int64_t number,char*& predicate,int charlen){ + int i = 0; + for ( ; i < charlen ; i++ ) { + predicate[charlen-i-1] = number%10+48; + number = number/10; + } + predicate[i] = '\0'; + return predicate; +} + + +int parquet_reader(int argc,char** argv) { + + std::string PARQUET_FILENAME = argv[1]; + try { + // Create a ParquetReader instance + std::unique_ptr parquet_reader = + parquet::ParquetFileReader::OpenFile(PARQUET_FILENAME, false); + + // Get the File MetaData + std::shared_ptr file_metadata = parquet_reader->metadata(); + + int num_row_groups = file_metadata->num_row_groups(); + + // Get the number of Columns + int num_columns = file_metadata->num_columns(); + // assert(num_columns == NUM_COLS); + std::ofstream runfile; + runfile.open(PARQUET_FILENAME+"-run-results.txt");//+"-"+std::to_string(col_id); + if ( argc == 3 ){ + // Point Queries & range queries + + int64_t num_rows = 0; + int num_queries = 1000; + int num_runs = 1; + + // char *col_num = argv[3]; + // std::stringstream ss(col_num); + // int col_id; + // ss >> col_id; + + getnumrows(argv[2],num_rows); + + trun times_by_type[num_columns]; + + runfile << time(NULL) << std::endl; + runfile << "############################## -- RUNNING POINT QUERIES -- ########################################" << std::endl; + for ( int col_id = 0; col_id < num_columns; col_id++){ + + times_by_type[col_id].w_index = 0.0; + times_by_type[col_id].wo_index = 0.0; + times_by_type[col_id].wo_totaltime = 0.0; + times_by_type[col_id].w_totaltime = 0.0; + times_by_type[col_id].b_totaltime = 0.0; + times_by_type[col_id].w_blf_totaltime = 0.0; + times_by_type[col_id].w_pageblf_totaltime = 0.0; + times_by_type[col_id].b_index = 0.0; + times_by_type[col_id].wo_total_pages_scanned = 0.0; + times_by_type[col_id].w_total_pages_scanned = 0.0; + times_by_type[col_id].b_total_pages_scanned = 0.0; + times_by_type[col_id].blf_load_time = 0.0; + times_by_type[col_id].index_load_time = 0.0; + } + + // over each rowgroup + for ( int r = 0; r < num_row_groups; r++) { + // for each column so many queries run so many times. + for ( int col_id =0; col_id < num_columns; col_id++){ + //re-initialize column index, offset index and bloomfilters for each column + std::shared_ptr row_group_reader = parquet_reader->RowGroup(r); + // for that column so many runs + for(int i=0; i < num_runs; i++){ + int predicateindex = 0; + char** predicates = (char**)malloc(sizeof(char*)*num_queries); + while ( predicateindex < num_queries ){ + // one query of the queries of the run + // sleep(1); + srand(time(NULL)); + char* predicate_val = (char*)malloc(intlog(num_rows)+1); + convertToCharptr(rand()%num_rows,predicate_val,intlog(num_rows)); + predicates[predicateindex] = predicate_val; + + runfile << " run number " << i << "-- Query number " << predicateindex << "-- col_num " << col_id << " predicate: " << predicates[predicateindex] << std::endl; + trun avgtime = run_for_one_predicate(runfile, num_columns,row_group_reader,parquet_reader,col_id,predicates,predicateindex,0,true,true,true); + + times_by_type[col_id].wo_totaltime += avgtime.wo_totaltime; + times_by_type[col_id].w_totaltime += avgtime.w_totaltime; + times_by_type[col_id].b_totaltime += avgtime.b_totaltime; + times_by_type[col_id].w_blf_totaltime += avgtime.w_blf_totaltime; + // times_by_type[col_id].w_pageblf_totaltime = avgtime.w_pageblf_totaltime; + + times_by_type[col_id].wo_total_pages_scanned += avgtime.wo_total_pages_scanned; + times_by_type[col_id].w_total_pages_scanned += avgtime.w_total_pages_scanned; + times_by_type[col_id].b_total_pages_scanned += avgtime.b_total_pages_scanned; + times_by_type[col_id].w_blf_total_pages_scanned += avgtime.w_blf_total_pages_scanned; + // times_by_type[col_id].w_pageblf_total_pages_scanned += avgtime.w_pageblf_total_pages_scanned; + + times_by_type[col_id].wo_mem_used += avgtime.wo_mem_used; + times_by_type[col_id].w_mem_used += avgtime.w_mem_used; + times_by_type[col_id].b_mem_used += avgtime.b_mem_used; + times_by_type[col_id].w_blf_mem_used += avgtime.w_blf_mem_used; + // times_by_type[col_id].w_pageblf_mem_used += avgtime.w_pageblf_mem_used; + + times_by_type[col_id].wo_read_bytes += avgtime.wo_read_bytes; + times_by_type[col_id].w_read_bytes += avgtime.w_read_bytes; + times_by_type[col_id].b_read_bytes += avgtime.b_read_bytes; + times_by_type[col_id].w_blf_read_bytes += avgtime.w_blf_read_bytes; + // times_by_type[col_id].w_pageblf_read_bytes += avgtime.w_pageblf_read_bytes; + + times_by_type[col_id].wo_write_bytes += avgtime.wo_write_bytes; + times_by_type[col_id].w_write_bytes += avgtime.w_write_bytes; + times_by_type[col_id].b_write_bytes += avgtime.b_write_bytes; + times_by_type[col_id].w_blf_write_bytes += avgtime.w_blf_write_bytes; + // times_by_type[col_id].w_pageblf_write_bytes += avgtime.w_pageblf_write_bytes; + times_by_type[col_id].blf_load_time = row_group_reader->GetBLFLoadTime(); + times_by_type[col_id].index_load_time = row_group_reader->GetIndexLoadTime(); + + predicateindex++; + } + } + + runfile << "############################### -- POINT QUERY RUN TIME RESULTS FINAL --" << col_id << "-- ################################" << std::endl; + + runfile<< "|----------------------------col_num " << col_id << "----------------------------|" << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w/o index " + << (times_by_type[col_id].wo_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].wo_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].wo_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].wo_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].wo_write_bytes + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index " + << (times_by_type[col_id].w_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].w_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].w_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].w_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].w_write_bytes + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index with bloomfilter " + << (times_by_type[col_id].b_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].b_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].b_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].b_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].b_write_bytes + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w/o index with bloomfilter " + << (times_by_type[col_id].w_blf_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].w_blf_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].w_blf_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].w_blf_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].w_blf_write_bytes + << std::endl; + + // runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index with binary with bloomfilter " + // << (times_by_type[col_id].w_pageblf_totaltime/(num_runs*num_queries)) << std::endl + // << " avg num of datapage indices scanned " << (times_by_type[col_id].w_pageblf_total_pages_scanned/(num_runs*num_queries)) << std::endl + // << " avg memory used in kB " << times_by_type[col_id].w_pageblf_mem_used << std::endl + // << " avg bytes read " << times_by_type[col_id].w_pageblf_read_bytes << std::endl + // << " avg bytes written " << times_by_type[col_id].w_pageblf_write_bytes + // << std::endl; + + runfile<< "|----------------------------------------------------------------------------------|" << std::endl; + + } + } + + runfile << "############################### -- POINT QUERY RUN TIME RESULTS FINAL ################################" << std::endl; + for ( int col_id =0; col_id < num_columns; col_id++){ + runfile<< "|----------------------------col_num " << col_id << "----------------------------|" << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w/o index " + << (times_by_type[col_id].wo_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].wo_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].wo_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].wo_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].wo_write_bytes + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index " + << (times_by_type[col_id].w_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].w_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].w_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].w_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].w_write_bytes + << " index load time " << times_by_type[col_id].index_load_time + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index with bloomfilter " + << (times_by_type[col_id].b_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].b_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].b_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].b_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].b_write_bytes + << " index load time " << times_by_type[col_id].index_load_time + << " blf load time " << times_by_type[col_id].blf_load_time + << std::endl; + + runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w/o index with bloomfilter " + << (times_by_type[col_id].w_blf_totaltime/(num_runs*num_queries)) << std::endl + << " avg num of datapage indices scanned " << (times_by_type[col_id].w_blf_total_pages_scanned/(num_runs*num_queries)) << std::endl + << " avg memory used in kB " << times_by_type[col_id].w_blf_mem_used << std::endl + << " avg bytes read " << times_by_type[col_id].w_blf_read_bytes << std::endl + << " avg bytes written " << times_by_type[col_id].w_blf_write_bytes + << " blf load time " << times_by_type[col_id].blf_load_time + << std::endl; + + // runfile << std::setprecision(3) <<"POINT QUERY: minimum average time w index with binary with bloomfilter " + // << (times_by_type[col_id].w_pageblf_totaltime/(num_runs*num_queries)) << std::endl + // << " avg num of datapage indices scanned " << (times_by_type[col_id].w_pageblf_total_pages_scanned/(num_runs*num_queries)) << std::endl + // << " avg memory used in kB " << times_by_type[col_id].w_pageblf_mem_used << std::endl + // << " avg bytes read " << times_by_type[col_id].w_pageblf_read_bytes << std::endl + // << " avg bytes written " << times_by_type[col_id].w_pageblf_write_bytes + // << std::endl; + + runfile<< "|----------------------------------------------------------------------------------|" << std::endl; + + } + runfile << "#######################################################################################################" << std::endl; + } + + if ( argc == 4 ) { + char *col_num = argv[2]; + std::stringstream ss(col_num); + int colid; + ss >> colid; + for ( int r = 0; r < num_row_groups; r++) { + std::shared_ptr row_group_reader = parquet_reader->RowGroup(r); + run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,3,0,true,true,true); + } + } + + + if ( argc == 5 ){ + char *col_num = argv[2]; + std::stringstream ss(col_num); + int colid; + ss >> colid; + for ( int r = 0; r < num_row_groups; r++) { + std::shared_ptr row_group_reader = parquet_reader->RowGroup(r); + run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,3,1,true,true,true); + run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,4,-1,true,true,true); + } + } + runfile.close(); + return 0; + } catch (const std::exception& e) { + std::cerr << "Parquet read error: " << e.what() << std::endl; + return -1; + } + +} + +trun run_for_one_predicate(std::ofstream& runfile,int num_columns,std::shared_ptr& row_group_reader, std::unique_ptr& parquet_reader, int colid,char** argv,int predicate_index, + int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf) { + + + trun avgtime; + int64_t prev_num_bytes_r = 0; + int64_t prev_num_bytes_rc = 0; + int64_t prev_num_bytes_w = 0; + int64_t prev_num_bytes_wc = 0; + int64_t curr_num_bytes_r = 0; + int64_t curr_num_bytes_rc = 0; + int64_t curr_num_bytes_w = 0; + int64_t curr_num_bytes_wc = 0; + int64_t prev_mem_used = 0; + int64_t curr_mem_used = 0; + // Iterate over all the RowGroups in the file + //for (int r = 0; r < num_row_groups; ++r) + { + + + char *predicate_val = argv[predicate_index]; + + int col_id = colid; + // Get the RowGroup Reader + + clock_t start_time,end_time; + float total_time= 0.0; + int num_runs = 1; + + float total_pages_scanned = 0.0; + + runfile << " Column ID: " << col_id << "| Column Type: " << row_group_reader->Column(col_id)->type() << std::endl; + + /********FIRST PASS WITHOUT INDEX***************/ + /*total_time = 0.0; + prev_mem_used = getMemValue(); + prev_num_bytes_r = getReadBytesValue(); + prev_num_bytes_w = getWriteBytesValue(); + runfile << " ########################################################################## " << std::endl; + runfile << "\n time for predicate one pass without index: " << std::endl; + for(int t =0 ; t< num_runs; t++){ + auto start_time = std::chrono::high_resolution_clock::now(); + total_pages_scanned += first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,false,equal_to,!binary_search,!with_bloom_filter, !with_page_bf); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + float time_elapsed = (float) duration.count(); + + runfile << std::setprecision(3) << time_elapsed << std::endl; + curr_mem_used = getMemValue(); + curr_num_bytes_r = getReadBytesValue(); + curr_num_bytes_w = getWriteBytesValue(); + runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl; + runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl; + runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; + runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; + + total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed; + } + avgtime.wo_total_pages_scanned = total_pages_scanned/num_runs; + avgtime.wo_totaltime = total_time; + avgtime.wo_mem_used = curr_mem_used-prev_mem_used; + avgtime.wo_read_bytes = curr_num_bytes_r - prev_num_bytes_r; + avgtime.wo_write_bytes = curr_num_bytes_w - prev_num_bytes_w; + runfile << " ------------------------------------------------------------------------ " << std::endl;*/ + + /**************FIRST PASS WITH INDEX WITHOUT BINARY WITHOUT BF PAGE BF*****************/ + + /*total_time = 0.0; + total_pages_scanned = 0.0; + prev_mem_used = getMemValue(); + prev_num_bytes_r = getReadBytesValue(); + prev_num_bytes_w = getWriteBytesValue(); + runfile << " ------------------------------------------------------------------------ " << std::endl; + runfile << "\n time for predicate one pass without bloom filter: " << std::endl; + for(int t =0 ; t< num_runs; t++){ + auto start_time = std::chrono::high_resolution_clock::now(); + first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,true,equal_to, !binary_search, !with_bloom_filter,!with_page_bf); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + float time_elapsed = (float) duration.count(); + + runfile << std::setprecision(3) << time_elapsed << std::endl; + curr_mem_used = getMemValue(); + curr_num_bytes_r = getReadBytesValue(); + curr_num_bytes_w = getWriteBytesValue(); + runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl; + runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl; + runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; + runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; + runfile << "\n index load time: " << row_group_reader->GetIndexLoadTime() << std::endl; + total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed; + } + + avgtime.w_total_pages_scanned = total_pages_scanned/num_runs; + avgtime.w_totaltime = total_time; + avgtime.w_mem_used = curr_mem_used-prev_mem_used; + avgtime.w_read_bytes = curr_num_bytes_r - prev_num_bytes_r; + avgtime.w_write_bytes = curr_num_bytes_w - prev_num_bytes_w; + runfile << " ------------------------------------------------------------------------ " << std::endl;*/ + /**************FIRST PASS WITH INDEX WITH BINARY WITHOUT BF PAGE BF*****************/ + + /*total_time = 0.0; + total_pages_scanned = 0.0; + prev_mem_used = getMemValue(); + prev_num_bytes_r = getReadBytesValue(); + prev_num_bytes_w = getWriteBytesValue(); + runfile << " ------------------------------------------------------------------------ " << std::endl; + runfile << "\n time for predicate one pass with bloom filter: " << std::endl; + for(int t =0 ; t< num_runs; t++){ + auto start_time = std::chrono::high_resolution_clock::now(); + first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,true,equal_to, !binary_search, with_bloom_filter,!with_page_bf); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + + float time_elapsed = (float) duration.count(); + + runfile << std::setprecision(3) << time_elapsed << std::endl; + curr_mem_used = getMemValue(); + curr_num_bytes_r = getReadBytesValue(); + curr_num_bytes_w = getWriteBytesValue(); + runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl; + runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl; + runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; + runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; + runfile << "\n index load time: " << row_group_reader->GetIndexLoadTime() << std::endl; + runfile << "\n blf load time: " << row_group_reader->GetBLFLoadTime() << std::endl; + total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed; + } + + avgtime.b_total_pages_scanned = total_pages_scanned/num_runs; + avgtime.b_totaltime = total_time; + avgtime.b_mem_used = curr_mem_used-prev_mem_used; + avgtime.b_read_bytes = curr_num_bytes_r - prev_num_bytes_r; + avgtime.b_write_bytes = curr_num_bytes_w - prev_num_bytes_w; + runfile << " ------------------------------------------------------------------------ " << std::endl;*/ + /**************FIRST PASS WITH INDEX WITH BINARY WITH BF WITHOUT PAGE BF*****************/ + + total_time = 0.0; + total_pages_scanned = 0.0; + prev_mem_used = getMemValue(); + prev_num_bytes_r = getReadBytesValue(); + prev_num_bytes_w = getWriteBytesValue(); + runfile << " ------------------------------------------------------------------------ " << std::endl; + runfile << "\n time for predicate without index with bloom filter: " << std::endl; + for(int t =0 ; t< num_runs; t++){ + auto start_time = std::chrono::high_resolution_clock::now(); + first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,false,equal_to, !binary_search, with_bloom_filter,!with_page_bf); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + + float time_elapsed = (float) duration.count(); + + runfile << std::setprecision(3) << time_elapsed << std::endl; + curr_mem_used = getMemValue(); + curr_num_bytes_r = getReadBytesValue(); + curr_num_bytes_w = getWriteBytesValue(); + runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl; + runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl; + runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; + runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl; + runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; + runfile << "\n blf load time: " << row_group_reader->GetBLFLoadTime() << std::endl; + total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed; + } + + avgtime.w_blf_total_pages_scanned = total_pages_scanned/num_runs; + avgtime.w_blf_totaltime = total_time; + avgtime.w_blf_mem_used = curr_mem_used-prev_mem_used; + avgtime.w_blf_read_bytes = curr_num_bytes_r - prev_num_bytes_r; + avgtime.w_blf_write_bytes = curr_num_bytes_w - prev_num_bytes_w; + runfile << " ########################################################################## " << std::endl; + + /***********FIRST PASS END **********/ + + /***********Second PASS *************/ + // TODO // + + /***********************************/ + + } + return avgtime; +} + + +int64_t first_pass_for_predicate_only(std::ofstream& runfile, std::shared_ptr row_group_reader,int col_id, int num_columns, char* predicate_val,bool with_index, + int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf) { + + int64_t row_index = 0; + int64_t count_pages_scanned = 0, total_num_pages = 0, last_first_row = 0; + + std::vector col_row_counts(num_columns, 0); + + // assert(row_group_reader->metadata()->total_byte_size() < ROW_GROUP_SIZE); + + // int16_t definition_level; + // int16_t repetition_level; + std::shared_ptr column_reader; + + + // std::cout<< "test arg v" < unsorted_page_index; + std::vector unsorted_row_index; + + char c; + // int64_t predicate; + // sscanf(argv[2], "%" SCNd64 "%c", &predicate, &c); + + // int PREDICATE_COL; + // sscanf(argv[2], "%d" "%c", &PREDICATE_COL, &c); + // Get the Column Reader for the Int64 column + std::shared_ptr predicate_column_reader = row_group_reader->Column(col_id); + + + + // std::cout << "given predicate: " << predicate << " type of predicate: " << typeid(predicate).name() << std::endl; + + std::shared_ptr column_reader_with_index; + + parquet::ColumnReader* generic_reader; + + int PREDICATE_COL = col_id; + return_multiple vals = getPredicate(predicate_column_reader,row_group_reader,predicate_val,col_id,page_index,PREDICATE_COL,row_index,with_index,binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter, with_page_bf, + unsorted_page_index, unsorted_row_index); + column_reader_with_index = vals.column_reader; + + //SAMPLE row group reader call in the comment below + // row_group_reader->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,predicate_column_reader->type()); + + returnReaderwithType(column_reader_with_index,generic_reader); + + int counter = 0; + int ind = 0; + int64_t row_counter = 0; + + if (unsorted_row_index.size()==0){ + if ( row_index != -1 ) { + if(with_index){ + ind = row_index; + row_counter = 0; + generic_reader->Skip(row_index); + do{ ind++; + if((printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to))) + break; + }while((generic_reader->HasNext())); + } + else{ + while (generic_reader->HasNext()) { + ind++; + count_pages_scanned++; + if(printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to)) + break; + // int64_t expected_value = col_row_counts[col_id]; + // assert(value == expected_value); + col_row_counts[col_id]++; + } + } + // Read all the rows in the column + runfile << "| page index: " << page_index << "| number of rows loaded: " << ind << + "| total number of pages: " << total_num_pages << "| last page first row index: " << last_first_row << std::endl; + + } + else{ + runfile << "non-member query" << std::endl; + } + } + else{ + ind = 0; + int index_list_count = 0; + bool found = false; + for(int64_t row_index: unsorted_row_index) { + row_counter = 0; + generic_reader->Skip(row_index); + do{ ind++; + if((printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to))){ + found = true; + break; + } + + }while((generic_reader->HasNext())); + // Read all the rows in the column + runfile << "| page index: " << unsorted_page_index[index_list_count] << "| number of rows loaded: " << ind << + "| total number of pages: " << total_num_pages << "| last page first row index: " << last_first_row << std::endl; + index_list_count++; + if (found) break; + } + if ( ind == (int)unsorted_row_index.size()) + runfile << "non-member query" << std::endl; + } + + return count_pages_scanned; +} + +return_multiple getPredicate(std::shared_ptr cr,std::shared_ptr rg,char* predicate_val, + int& col_id,int64_t& page_index,int& PREDICATE_COL,int64_t& row_index, bool with_index, + bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index){ + const int CHAR_LEN = 10000000; + + return_multiple vals; + std::stringstream ss(predicate_val); + switch(cr->type()){ + case Type::BOOLEAN:{ + bool b; + + ss >> std::boolalpha >> b; + void * predicate = static_cast(&b); + + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.b = b; + return vals; + } + case Type::INT32:{ + int32_t val; + + ss >> val; + void * predicate = static_cast(&val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.p = val; + return vals; + } + case Type::INT64:{ + int64_t val; + + ss >> val; + void * predicate = static_cast(&val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.r = val; + return vals; + } + case Type::INT96:{ + uint32_t val; + + ss >> val; + void * predicate = static_cast(&val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.e = val; + return vals; + } + case Type::FLOAT:{ + float val; + + ss >> val; + void * predicate = static_cast(&val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.d = val; + return vals; + } + case Type::DOUBLE:{ + double val; + + ss >> val; + void * predicate = static_cast(&val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.i = val; + return vals; + } + case Type::BYTE_ARRAY:{ + char* val = predicate_val; + + void * predicate = static_cast(val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.c = val; + return vals; + } + case Type::FIXED_LEN_BYTE_ARRAY:{ + char* val = predicate_val; + + void * predicate = static_cast(val); + vals.column_reader = + rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter,with_page_bf, + unsorted_min_index, unsorted_row_index); + vals.a = val; + return vals; + } + default:{ + std::cout<< "type not supported" << std::endl; + vals.a = NULL; + vals.b = NULL; + vals.c = NULL; + vals.t = NULL; + return vals; + } + } +} + +void returnReaderwithType(std::shared_ptrcolumn_reader, parquet::ColumnReader*& int64_reader){ + switch (column_reader->type()) { + case Type::BOOLEAN: + int64_reader = static_cast(column_reader.get()); + break; + case Type::INT32: + int64_reader = static_cast(column_reader.get()); + break; + case Type::INT64: + int64_reader = static_cast(column_reader.get()); + break; + case Type::INT96: + int64_reader = static_cast(column_reader.get()); + break; + case Type::FLOAT: + int64_reader = static_cast(column_reader.get()); + break; + case Type::DOUBLE: + int64_reader = static_cast(column_reader.get()); + break; + case Type::BYTE_ARRAY: + int64_reader = static_cast(column_reader.get()); + break; + case Type::FIXED_LEN_BYTE_ARRAY: + int64_reader = static_cast(column_reader.get()); + break; + default: + parquet::ParquetException::NYI("type reader not implemented"); + } +} + +bool printVal(std::ofstream& runfile, std::shared_ptrcolumn_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals,int64_t& row_counter, + bool checkpredicate = false,int equal_to = 0) { + + int64_t values_read = 0; + //int64_t 0; + switch (column_reader->type()) { + case Type::BOOLEAN: + { + bool test; + bool predicate = vals.b; + int64_reader->callReadBatch(1,&test,&values_read); + row_counter = ind; + + if ( equal_to == 0 && checkpredicate && test == predicate) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << test << "\n" ; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && test < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && test > predicate ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << test << "\n"; + return false; + } + break; + } + case Type::INT32: + { + int32_t val; + int32_t predicate = vals.p; + int64_reader->callReadBatch(1,&val,&values_read); + row_counter = ind; + + if ( equal_to == 0 && checkpredicate && val == predicate) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << val << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && val < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && val > predicate ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << val << "\n"; + return false; + } + break; + } + case Type::INT64: + { + int64_t value; + int64_t predicate = vals.r; + // Read one value at a time. The number of rows read is returned. values_read + // contains the number of non-null rows + int64_reader->callReadBatch(1,&value,&values_read); + + // Ensure only one value is read + //assert(rows_read == 1); + // There are no NULL values in the rows written + // assert(values_read == 1); + // Verify the value written + if ( equal_to == 0 && checkpredicate && value == predicate) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << value << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && value < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && value > predicate ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << value << "\n"; + return false; + } + break; + } + case Type::INT96: + { + uint32_t val; + uint32_t predicate = vals.e; + int64_reader->callReadBatch(1,&val,&values_read); + row_counter = ind; + + if ( equal_to == 0 && checkpredicate && val == predicate) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << val << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && val < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && val > predicate ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << val << "\n"; + return false; + } + break; + } + case Type::FLOAT: + { + float val; + float predicate = vals.d; + float error_factor = 9*pow(10,15); + int64_reader->callReadBatch(1,&val,&values_read); + if ( checkpredicate && fabs(val-predicate)<=std::numeric_limits::epsilon()*error_factor) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << val << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && val < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && val > predicate ) { + + } + + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << val << "\n"; + return false; + } + break; + } + case Type::DOUBLE: + { + double val; + double predicate = vals.i; + int64_reader->callReadBatch(1,&val,&values_read); + double error_factor = 9*pow(10,15); + + if ( equal_to == 0 && checkpredicate && fabs(val-predicate)<=std::numeric_limits::epsilon()*error_factor) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << val << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && val < predicate ){ + + } + else if ( equal_to == 1 && checkpredicate && val > predicate ) { + + } + + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << val << "\n"; + return false; + } + break; + } + case Type::BYTE_ARRAY: + { + parquet::ByteArray str; + char* predicate = vals.c; + uint32_t FIXED_LENGTH = 124; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(predicate));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(predicate)); i < FIXED_LENGTH;i++) dest[i] = predicate[i-(FIXED_LENGTH-strlen(predicate))]; + dest[FIXED_LENGTH] = '\0'; + std::string pstring(dest); + int64_reader->callReadBatch(1,&str,&values_read); + std::string result_value = parquet::ByteArrayToString(str); + // std::string result(result_value.substr(result_value.length()-strlen(predicate),strlen(predicate))); + row_counter = ind; + // std::cout << "row number: " << row_counter << " " << result << "\n"; + if ( equal_to == 0 && checkpredicate && result_value.compare(pstring) == 0) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << result_value << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && result_value.compare(pstring) < 0 ){ + + } + else if ( equal_to == 1 && checkpredicate && result_value.compare(pstring) > 0 ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << result << "\n"; + return false; + } + break; + } + case Type::FIXED_LEN_BYTE_ARRAY: + { + parquet::FLBA str; + char* predicate = vals.a; + int64_reader->callReadBatch(1,&str,&values_read); + std::string result_value = parquet::FixedLenByteArrayToString(str,sizeof(str)); + std::string result(result_value.substr(result_value.length()-strlen(predicate),strlen(predicate))); + row_counter = ind; + // std::cout << "row number: " << row_counter << " " << result << "\n"; + if ( equal_to == 0 && checkpredicate && strcmp(result.c_str(),predicate) == 0) { + row_counter = ind; + runfile << "with predicate row number: " << row_counter << " " << result << "\n"; + //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl; + return true; + } + else if ( equal_to == -1 && checkpredicate && strcmp(result.c_str(),predicate) < 0 ){ + + } + else if ( equal_to == 1 && checkpredicate && strcmp(result.c_str(),predicate) > 0 ) { + + } + else{ + row_counter = ind; + //std::cout << "row number: " << row_counter << " " << result << "\n"; + return false; + } + break; + } + default:{ + parquet::ParquetException::NYI("type reader not implemented"); + return false; + } + } + return false; + +} diff --git a/cpp/examples/parquet/low-level-api/reader-writer.cc b/cpp/examples/parquet/low-level-api/reader-writer.cc index 82232ddbe7b..152738f0ed5 100644 --- a/cpp/examples/parquet/low-level-api/reader-writer.cc +++ b/cpp/examples/parquet/low-level-api/reader-writer.cc @@ -42,8 +42,8 @@ * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md **/ -constexpr int NUM_ROWS_PER_ROW_GROUP = 500; -const char PARQUET_FILENAME[] = "parquet_cpp_example.parquet"; +constexpr int NUM_ROWS_PER_ROW_GROUP = 15000000; +const char PARQUET_FILENAME[] = "parquet_cpp_example_15M.parquet"; int main(int argc, char** argv) { /********************************************************************************** @@ -57,12 +57,14 @@ int main(int argc, char** argv) { using FileClass = ::arrow::io::FileOutputStream; std::shared_ptr out_file; PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME)); + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); // Setup the parquet schema std::shared_ptr schema = SetupSchema(); // Add writer properties parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); builder.compression(parquet::Compression::SNAPPY); std::shared_ptr props = builder.build(); @@ -73,6 +75,13 @@ int main(int argc, char** argv) { // Append a RowGroup with a specific number of rows. parquet::RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); + // // Write the Bool column + // parquet::BoolWriter* bool_writer = + // static_cast(rg_writer->NextColumn()); + // for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + // bool value = ((i % 2) == 0) ? true : false; + // bool_writer->WriteBatch(1, nullptr, nullptr, &value); + // } // Write the Bool column parquet::BoolWriter* bool_writer = static_cast(rg_writer->NextColumn()); diff --git a/cpp/examples/parquet/low-level-api/reader_writer_forindex.h b/cpp/examples/parquet/low-level-api/reader_writer_forindex.h new file mode 100644 index 00000000000..7e149d15753 --- /dev/null +++ b/cpp/examples/parquet/low-level-api/reader_writer_forindex.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include + +using parquet::ConvertedType; +using parquet::Repetition; +using parquet::Type; +using parquet::schema::GroupNode; +using parquet::schema::PrimitiveNode; + +constexpr int FIXED_LENGTH = 10; + +static std::shared_ptr SetupSchema() { + parquet::schema::NodeVector fields; + + + // Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED, + // logical type:TIME_MILLIS + fields.push_back(PrimitiveNode::Make("int32_field1", Repetition::REQUIRED, Type::INT32,ConvertedType::NONE)); + + // Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED + fields.push_back(PrimitiveNode::Make("int64_field1", Repetition::REQUIRED, Type::INT64,ConvertedType::NONE)); + + fields.push_back(PrimitiveNode::Make("float_field1", Repetition::REQUIRED, Type::FLOAT,ConvertedType::NONE)); + + fields.push_back(PrimitiveNode::Make("double_field1", Repetition::REQUIRED, Type::DOUBLE,ConvertedType::NONE)); + + // Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL + fields.push_back(PrimitiveNode::Make("ba_field1", Repetition::OPTIONAL, Type::BYTE_ARRAY,ConvertedType::NONE)); + + // Create a GroupNode named 'schema' using the primitive nodes defined above + // This GroupNode is the root node of the schema tree + return std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); +} diff --git a/cpp/examples/parquet/low-level-api/writer-with-index.cc b/cpp/examples/parquet/low-level-api/writer-with-index.cc new file mode 100644 index 00000000000..56eb9cb6202 --- /dev/null +++ b/cpp/examples/parquet/low-level-api/writer-with-index.cc @@ -0,0 +1,753 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "parquet/column_reader.h" +#include "parquet/column_scanner.h" +#include "parquet/deprecated_io.h" +#include "parquet/exception.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/properties.h" +#include "parquet/schema.h" +#include "parquet/types.h" + +/* + * This example illustrates PARQUET-1404 for page level skipping in + * writing and reading Parquet Files in C++ and serves as a + * reference to the API for reader and writer enhanced with Column Index and Offset Index + * The file contains all the physical data types supported by Parquet. + * This example uses the RowGroupWriter API that supports writing RowGroups based on a + *certain size + **/ + +/* Parquet is a structured columnar file format + * Parquet File = "Parquet data" + "Parquet Metadata" + * "Parquet data" is simply a vector of RowGroups. Each RowGroup is a batch of rows in a + * columnar layout + * "Parquet Metadata" contains the "file schema" and attributes of the RowGroups and their + * Columns + * "file schema" is a tree where each node is either a primitive type (leaf nodes) or a + * complex (nested) type (internal nodes) + * For specific details, please refer the format here: + * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md + **/ + + +/********************************************************************************* + PARQUET WRITER WITH PAGE SKIPPING EXAMPLE +**********************************************************************************/ + +void writecolswithindexbf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){ + uint32_t num_bytes = 0; + rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp); + + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = i*int32factor; + int32_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Int64 column. Each row has not[repeats twice]. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = i*int64factor; + int64_writer->WriteBatch(1, nullptr,nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(i) * float_factor;//1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = i * double_factor;//1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH];// = "parquet"; + int64_t startnumber = i; + for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) { + hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(&value); + // } else { + // int16_t definition_level = 1; + // value.ptr = reinterpret_cast(&hello[0]); + // value.len = FIXED_LENGTH; + // ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + // rg_writer->AppendRowGroupBloomFilter(&value); + // } + } + std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + +} + +void writecolswithindexbfunsorted(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp, int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){ + uint32_t num_bytes = 0; + rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp); + srand(time(NULL)); + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = rand()%NUM_ROWS_PER_ROW_GROUP; + int32_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + srand(time(NULL)); + // Write the Int64 column. Each row has not[repeats twice]. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = rand()%NUM_ROWS_PER_ROW_GROUP; + int64_writer->WriteBatch(1, nullptr,nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + srand(time(NULL)); + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(rand()%NUM_ROWS_PER_ROW_GROUP) * float_factor;//1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + srand(time(NULL)); + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = rand()%NUM_ROWS_PER_ROW_GROUP * double_factor;//1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + srand(time(NULL)); + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH];// = "parquet"; + int64_t startnumber = i; + for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) { + hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(&value); + // } else { + // int16_t definition_level = 1; + // value.ptr = reinterpret_cast(&hello[0]); + // value.len = FIXED_LENGTH; + // ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + // rg_writer->AppendRowGroupBloomFilter(&value); + // } + } + std::cout << "number of bytes ByteArray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + +} + +void writeparquetwithindexbf(int NUM_ROWS, int num_rg, float fpp) { + const char* filename_1 = "parquet_cpp_example_"; + std::string s1(std::to_string(NUM_ROWS)+"_"); + const char* filename_2 = s1.c_str(); + std::string s2(std::to_string(num_rg)); + const char* filename_4 = s2.c_str(); + const char* filename_3 = "_sorted.parquet"; + + char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)]; + strcpy(PARQUET_FILENAME,filename_1); + strcat(PARQUET_FILENAME,filename_2); + strcat(PARQUET_FILENAME,filename_4); + strcat(PARQUET_FILENAME,filename_3); + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer; + for ( int i=0; i < num_rg; i++) { + rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg); + writecolswithindexbf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124); + } + // Close the ParquetFileWriter + file_writer->CloseWithIndex(true,true); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + //return -1; + } +} + +void writeparquetwithindexbfunsorted(int NUM_ROWS, int num_rg,float fpp) { + const char* filename_1 = "parquet_cpp_example_"; + std::string s1(std::to_string(NUM_ROWS)+"_"); + const char* filename_2 = s1.c_str(); + std::string s2(std::to_string(num_rg)); + const char* filename_4 = s2.c_str(); + const char* filename_3 = "_unsorted.parquet"; + + char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)]; + strcpy(PARQUET_FILENAME,filename_1); + strcat(PARQUET_FILENAME,filename_2); + strcat(PARQUET_FILENAME,filename_4); + strcat(PARQUET_FILENAME,filename_3); + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer; + for ( int i=0; i < num_rg; i++) { + rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg); + writecolswithindexbf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124); + } + + // Close the ParquetFileWriter + file_writer->CloseWithIndex(true,true); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + //return -1; + } +} + +void writecolswithoutindexbf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){ + uint32_t num_bytes = 0; + //rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes); + + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumn()); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = i*int32factor; + int32_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Int64 column. Each row has not[repeats twice]. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumn()); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = i*int64factor; + int64_writer->WriteBatch(1, nullptr,nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumn()); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(i) * float_factor;//1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumn()); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = i * double_factor;//1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumn()); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH];// = "parquet"; + int64_t startnumber = i; + for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) { + hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(&value); + // } else { + // int16_t definition_level = 1; + // value.ptr = reinterpret_cast(&hello[0]); + // value.len = FIXED_LENGTH; + // ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + // rg_writer->AppendRowGroupBloomFilter(&value); + // } + } + std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + +} + +void writeparquetwithoutindexbf(int NUM_ROWS, int num_rg, float fpp) { + const char* filename_1 = "parquet_cpp_example_"; + std::string s1(std::to_string(NUM_ROWS)+"_"); + const char* filename_2 = s1.c_str(); + std::string s2(std::to_string(num_rg)); + const char* filename_4 = s2.c_str(); + const char* filename_3 = "_WOIBF-sorted.parquet"; + + char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)]; + strcpy(PARQUET_FILENAME,filename_1); + strcat(PARQUET_FILENAME,filename_2); + strcat(PARQUET_FILENAME,filename_4); + strcat(PARQUET_FILENAME,filename_3); + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer; + for ( int i=0; i < num_rg; i++) { + rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg); + writecolswithoutindexbf(NUM_ROWS/num_rg,rg_writer,1,1,1.1f,1.1111111,124); + } + // Close the ParquetFileWriter + file_writer->CloseWithIndex(false,false); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + //return -1; + } +} + +void writecolsonlyindex(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp, int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){ + uint32_t num_bytes = 0; + //rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes); + + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = i*int32factor; + int32_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Int64 column. Each row has not[repeats twice]. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = i*int64factor; + int64_writer->WriteBatch(1, nullptr,nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(i) * float_factor;//1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = i * double_factor;//1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH];// = "parquet"; + int64_t startnumber = i; + for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) { + hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + //rg_writer->AppendRowGroupBloomFilter(&value); + // } else { + // int16_t definition_level = 1; + // value.ptr = reinterpret_cast(&hello[0]); + // value.len = FIXED_LENGTH; + // ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + // rg_writer->AppendRowGroupBloomFilter(&value); + // } + } + std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + +} + +void writeparquetonlyindex(int NUM_ROWS, int num_rg, float fpp) { + const char* filename_1 = "parquet_cpp_example_"; + std::string s1(std::to_string(NUM_ROWS)+"_"); + const char* filename_2 = s1.c_str(); + std::string s2(std::to_string(num_rg)); + const char* filename_4 = s2.c_str(); + const char* filename_3 = "_only-index-sorted.parquet"; + + char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)]; + strcpy(PARQUET_FILENAME,filename_1); + strcat(PARQUET_FILENAME,filename_2); + strcat(PARQUET_FILENAME,filename_4); + strcat(PARQUET_FILENAME,filename_3); + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer; + for ( int i=0; i < num_rg; i++) { + rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg); + writecolsonlyindex(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124); + } + // Close the ParquetFileWriter + file_writer->CloseWithIndex(true,false); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + //return -1; + } +} + +void writecolsonlybf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){ + uint32_t num_bytes = 0; + rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp); + + // Write the Int32 column + parquet::Int32Writer* int32_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int32_t value = i*int32factor; + int32_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Int64 column. Each row has not[repeats twice]. + parquet::Int64Writer* int64_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + int64_t value = i*int64factor; + int64_writer->WriteBatch(1, nullptr,nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + + // Write the Float column + parquet::FloatWriter* float_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + float value = static_cast(i) * float_factor;//1.1f; + float_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the Double column + parquet::DoubleWriter* double_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + double value = i * double_factor;//1.1111111; + double_writer->WriteBatch(1, nullptr, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(value); + } + std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + + // Write the ByteArray column. Make every alternate values NULL + parquet::ByteArrayWriter* ba_writer = + static_cast(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp)); + + for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH];// = "parquet"; + int64_t startnumber = i; + for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) { + hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + rg_writer->AppendRowGroupBloomFilter(&value); + // } else { + // int16_t definition_level = 1; + // value.ptr = reinterpret_cast(&hello[0]); + // value.len = FIXED_LENGTH; + // ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true); + // rg_writer->AppendRowGroupBloomFilter(&value); + // } + } + std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl; + +} + +void writeparquetonlybf(int NUM_ROWS, int num_rg, float fpp) { + const char* filename_1 = "parquet_cpp_example_"; + std::string s1(std::to_string(NUM_ROWS)+"_"); + const char* filename_2 = s1.c_str(); + std::string s2(std::to_string(num_rg)); + const char* filename_4 = s2.c_str(); + const char* filename_3 = "_only-bf-sorted.parquet"; + + char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)]; + strcpy(PARQUET_FILENAME,filename_1); + strcat(PARQUET_FILENAME,filename_2); + strcat(PARQUET_FILENAME,filename_4); + strcat(PARQUET_FILENAME,filename_3); + /********************************************************************************** + PARQUET WRITER EXAMPLE + **********************************************************************************/ + // parquet::REQUIRED fields do not need definition and repetition level values + // parquet::OPTIONAL fields require only definition level values + // parquet::REPEATED fields require both definition and repetition level values + try { + // Create a local file output stream instance. + using FileClass = ::arrow::io::FileOutputStream; + std::shared_ptr out_file; + PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + + // Setup the parquet schema + std::shared_ptr schema = SetupSchema(); + + // Add writer properties + parquet::WriterProperties::Builder builder; + builder.compression(parquet::Compression::UNCOMPRESSED); + std::shared_ptr props = builder.build(); + + // Create a ParquetFileWriter instance + std::shared_ptr file_writer = + parquet::ParquetFileWriter::Open(out_file, schema, props); + + // Append a RowGroup with a specific number of rows. + parquet::RowGroupWriter* rg_writer; + for ( int i=0; i < num_rg; i++) { + rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg); + writecolsonlybf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124); + } + // Close the ParquetFileWriter + file_writer->CloseWithIndex(false,true); + + // Write the bytes to file + DCHECK(out_file->Close().ok()); + } catch (const std::exception& e) { + std::cerr << "Parquet write error: " << e.what() << std::endl; + //return -1; + } +} + +int main(int argc, char** argv) { + if (argc == 4){ + int NUM_ROWS = atoi(argv[1]); + int num_rg = atoi(argv[2]); + float fpp = atof(argv[3]); + //writeparquetwithoutindexbf(NUM_ROWS,num_rg,fpp); + //writeparquetonlyindex(NUM_ROWS,num_rg,fpp); + //writeparquetonlybf(NUM_ROWS,num_rg,fpp); + writeparquetwithindexbfunsorted(NUM_ROWS,num_rg,fpp); + } + + std::cout << "Parquet Writing and Reading Complete" << std::endl; + + return 0; +} diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc new file mode 100644 index 00000000000..58072b61381 --- /dev/null +++ b/cpp/src/arrow/util/io-util.cc @@ -0,0 +1,983 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Ensure 64-bit off_t for platforms where it matters +#ifdef _FILE_OFFSET_BITS +#undef _FILE_OFFSET_BITS +#endif + +#define _FILE_OFFSET_BITS 64 + +#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include // IWYU pragma: keep + +// Defines that don't exist in MinGW +#if defined(__MINGW32__) +#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR +#elif defined(_MSC_VER) // Visual Studio + +#else // gcc / clang on POSIX platforms +#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH +#endif + +#include + +// ---------------------------------------------------------------------- +// file compatibility stuff + +#if defined(_WIN32) +#include +#include +#endif + +#ifdef _WIN32 // Windows +#include "arrow/io/mman.h" +#undef Realloc +#undef Free +#else // POSIX-like platforms +#include +#include +#endif + +// define max read/write count +#if defined(_WIN32) +#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX +#else + +#ifdef __APPLE__ +// due to macOS bug, we need to set read/write max +#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX +#else +// see notes on Linux read/write manpage +#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000 +#endif + +#endif + +#include "arrow/buffer.h" +#include "arrow/util/io-util.h" +#include "arrow/util/logging.h" + +// For filename conversion +#if defined(_WIN32) +#include "arrow/util/utf8.h" +#endif + +namespace arrow { +namespace io { + +// +// StdoutStream implementation +// + +StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); } + +Status StdoutStream::Close() { return Status::OK(); } + +bool StdoutStream::closed() const { return false; } + +Status StdoutStream::Tell(int64_t* position) const { + *position = pos_; + return Status::OK(); +} + +Status StdoutStream::Write(const void* data, int64_t nbytes) { + pos_ += nbytes; + std::cout.write(reinterpret_cast(data), nbytes); + return Status::OK(); +} + +// +// StderrStream implementation +// + +StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); } + +Status StderrStream::Close() { return Status::OK(); } + +bool StderrStream::closed() const { return false; } + +Status StderrStream::Tell(int64_t* position) const { + *position = pos_; + return Status::OK(); +} + +Status StderrStream::Write(const void* data, int64_t nbytes) { + pos_ += nbytes; + std::cerr.write(reinterpret_cast(data), nbytes); + return Status::OK(); +} + +// +// StdinStream implementation +// + +StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); } + +Status StdinStream::Close() { return Status::OK(); } + +bool StdinStream::closed() const { return false; } + +Status StdinStream::Tell(int64_t* position) const { + *position = pos_; + return Status::OK(); +} + +Status StdinStream::Read(int64_t nbytes, int64_t* bytes_read, void* out) { + std::cin.read(reinterpret_cast(out), nbytes); + if (std::cin) { + *bytes_read = nbytes; + pos_ += nbytes; + } else { + *bytes_read = 0; + } + return Status::OK(); +} + +Status StdinStream::Read(int64_t nbytes, std::shared_ptr* out) { + std::shared_ptr buffer; + ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer)); + int64_t bytes_read; + ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data())); + ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); + buffer->ZeroPadding(); + *out = buffer; + return Status::OK(); +} + +} // namespace io + +namespace internal { + +namespace bfs = ::boost::filesystem; + +namespace { + +Status StringToNative(const std::string& s, NativePathString* out) { +#if _WIN32 + std::wstring ws; + RETURN_NOT_OK(::arrow::util::UTF8ToWideString(s, &ws)); + *out = std::move(ws); +#else + *out = s; +#endif + return Status::OK(); +} + +} // namespace + +#define BOOST_FILESYSTEM_TRY try { +#define BOOST_FILESYSTEM_CATCH \ + } \ + catch (bfs::filesystem_error & _err) { \ + return ToStatus(_err); \ + } + +// NOTE: catching filesystem_error gives more context than system::error_code +// (it includes the file path(s) in the error message) + +static Status ToStatus(const bfs::filesystem_error& err) { + return Status::IOError(err.what()); +} + +static std::string MakeRandomName(int num_chars) { + static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz"; + std::random_device gen; + std::uniform_int_distribution dist(0, static_cast(chars.length() - 1)); + + std::string s; + s.reserve(num_chars); + for (int i = 0; i < num_chars; ++i) { + s += chars[dist(gen)]; + } + return s; +} + +std::string ErrnoMessage(int errnum) { return std::strerror(errnum); } + +#if _WIN32 +std::string WinErrorMessage(int errnum) { + char buf[1024]; + auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errnum, 0, buf, sizeof(buf), NULL); + if (nchars == 0) { + // Fallback + std::stringstream ss; + ss << "Windows error #" << errnum; + return ss.str(); + } + return std::string(buf, nchars); +} +#endif + +// +// PlatformFilename implementation +// + +struct PlatformFilename::Impl { + Impl() = default; + explicit Impl(bfs::path p) : path(p.make_preferred()) {} + + bfs::path path; +}; + +PlatformFilename::PlatformFilename() : impl_(new Impl{}) {} + +PlatformFilename::~PlatformFilename() {} + +PlatformFilename::PlatformFilename(const Impl& impl) : impl_(new Impl(impl)) {} + +PlatformFilename::PlatformFilename(Impl&& impl) : impl_(new Impl(std::move(impl))) {} + +PlatformFilename::PlatformFilename(const PlatformFilename& other) + : PlatformFilename(Impl{other.impl_->path}) {} + +PlatformFilename::PlatformFilename(PlatformFilename&& other) + : impl_(std::move(other.impl_)) {} + +PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) { + this->impl_.reset(new Impl{other.impl_->path}); + return *this; +} + +PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) { + this->impl_ = std::move(other.impl_); + return *this; +} + +PlatformFilename::PlatformFilename(const NativePathString& path) + : PlatformFilename(Impl{path}) {} + +const NativePathString& PlatformFilename::ToNative() const { + return impl_->path.native(); +} + +std::string PlatformFilename::ToString() const { +#if _WIN32 + std::wstring ws = impl_->path.generic_wstring(); + std::string s; + Status st = ::arrow::util::WideStringToUTF8(ws, &s); + if (!st.ok()) { + std::stringstream ss; + ss << ""; + return ss.str(); + } + return s; +#else + return impl_->path.generic_string(); +#endif +} + +Status PlatformFilename::FromString(const std::string& file_name, PlatformFilename* out) { + if (file_name.find_first_of('\0') != std::string::npos) { + return Status::Invalid("Embedded NUL char in file name: '", file_name, "'"); + } + NativePathString ns; + RETURN_NOT_OK(StringToNative(file_name, &ns)); + *out = PlatformFilename(std::move(ns)); + return Status::OK(); +} + +Status PlatformFilename::Join(const std::string& child_name, + PlatformFilename* out) const { + NativePathString ns; + RETURN_NOT_OK(StringToNative(child_name, &ns)); + auto path = impl_->path / ns; + *out = PlatformFilename(Impl{std::move(path)}); + return Status::OK(); +} + +Status CreateDir(const PlatformFilename& dir_path, bool* created) { + bool res; + BOOST_FILESYSTEM_TRY + res = bfs::create_directory(dir_path.impl_->path); + BOOST_FILESYSTEM_CATCH + if (created) { + *created = res; + } + return Status::OK(); +} + +Status CreateDirTree(const PlatformFilename& dir_path, bool* created) { + bool res; + BOOST_FILESYSTEM_TRY + res = bfs::create_directories(dir_path.impl_->path); + BOOST_FILESYSTEM_CATCH + if (created) { + *created = res; + } + return Status::OK(); +} + +Status DeleteDirTree(const PlatformFilename& dir_path, bool* deleted) { + BOOST_FILESYSTEM_TRY + const auto& path = dir_path.impl_->path; + // XXX There is a race here. + auto st = bfs::symlink_status(path); + if (st.type() != bfs::file_not_found && st.type() != bfs::directory_file) { + return Status::IOError("Cannot delete non -directory '", path.string(), "'"); + } + auto n_removed = bfs::remove_all(path); + if (deleted) { + *deleted = n_removed != 0; + } + BOOST_FILESYSTEM_CATCH + return Status::OK(); +} + +Status DeleteFile(const PlatformFilename& file_path, bool* deleted) { + BOOST_FILESYSTEM_TRY + bool res = false; + const auto& path = file_path.impl_->path; + // XXX There is a race here, and boost::filesystem doesn't allow deleting + // only files and not empty directories. + auto st = bfs::symlink_status(path); + if (!bfs::is_directory(st)) { + res = bfs::remove(path); + } else { + return Status::IOError("Cannot delete directory '", path.string(), "'"); + } + if (deleted) { + *deleted = res; + } + BOOST_FILESYSTEM_CATCH + return Status::OK(); +} + +Status FileExists(const PlatformFilename& path, bool* out) { + BOOST_FILESYSTEM_TRY + *out = bfs::exists(path.impl_->path); + BOOST_FILESYSTEM_CATCH + return Status::OK(); +} + +// +// File name handling +// + +Status FileNameFromString(const std::string& file_name, PlatformFilename* out) { + return PlatformFilename::FromString(file_name, out); +} + +// +// Functions for creating file descriptors +// + +#define CHECK_LSEEK(retval) \ + if ((retval) == -1) return Status::IOError("lseek failed"); + +static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) { +#if defined(_WIN32) + return _lseeki64(fd, pos, whence); +#else + return lseek(fd, pos, whence); +#endif +} + +static inline Status CheckFileOpResult(int ret, int errno_actual, + const PlatformFilename& file_name, + const char* opname) { + if (ret == -1) { +#ifdef _WIN32 + int winerr = GetLastError(); + if (winerr != ERROR_SUCCESS) { + return Status::IOError("Failed to ", opname, " file '", file_name.ToString(), + "', error: ", WinErrorMessage(winerr)); + } +#endif + return Status::IOError("Failed to ", opname, " file '", file_name.ToString(), + "', error: ", ErrnoMessage(errno_actual)); + } + return Status::OK(); +} + +Status FileOpenReadable(const PlatformFilename& file_name, int* fd) { + int ret, errno_actual; +#if defined(_WIN32) + SetLastError(0); + errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(), + _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD); + ret = *fd; +#else + ret = *fd = open(file_name.ToNative().c_str(), O_RDONLY); + errno_actual = errno; + + if (ret >= 0) { + // open(O_RDONLY) succeeds on directories, check for it + struct stat st; + ret = fstat(*fd, &st); + if (ret == -1) { + ARROW_UNUSED(FileClose(*fd)); + // Will propagate error below + } else if (S_ISDIR(st.st_mode)) { + ARROW_UNUSED(FileClose(*fd)); + return Status::IOError("Cannot open for reading: path '", file_name.ToString(), + "' is a directory"); + } + } +#endif + + return CheckFileOpResult(ret, errno_actual, file_name, "open local"); +} + +Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool truncate, + bool append, int* fd) { + int ret, errno_actual; + +#if defined(_WIN32) + SetLastError(0); + int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT; + int pmode = _S_IREAD | _S_IWRITE; + + if (truncate) { + oflag |= _O_TRUNC; + } + if (append) { + oflag |= _O_APPEND; + } + + if (write_only) { + oflag |= _O_WRONLY; + } else { + oflag |= _O_RDWR; + } + + errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(), oflag, _SH_DENYNO, pmode); + ret = *fd; + +#else + int oflag = O_CREAT; + + if (truncate) { + oflag |= O_TRUNC; + } + if (append) { + oflag |= O_APPEND; + } + + if (write_only) { + oflag |= O_WRONLY; + } else { + oflag |= O_RDWR; + } + + ret = *fd = open(file_name.ToNative().c_str(), oflag, ARROW_WRITE_SHMODE); + errno_actual = errno; +#endif + RETURN_NOT_OK(CheckFileOpResult(ret, errno_actual, file_name, "open local")); + if (append) { + // Seek to end, as O_APPEND does not necessarily do it + auto ret = lseek64_compat(*fd, 0, SEEK_END); + if (ret == -1) { + ARROW_UNUSED(FileClose(*fd)); + return Status::IOError("lseek failed"); + } + } + return Status::OK(); +} + +Status FileTell(int fd, int64_t* pos) { + int64_t current_pos; + +#if defined(_WIN32) + current_pos = _telli64(fd); + if (current_pos == -1) { + return Status::IOError("_telli64 failed"); + } +#else + current_pos = lseek64_compat(fd, 0, SEEK_CUR); + CHECK_LSEEK(current_pos); +#endif + + *pos = current_pos; + return Status::OK(); +} + +Status CreatePipe(int fd[2]) { + int ret; +#if defined(_WIN32) + ret = _pipe(fd, 4096, _O_BINARY); +#else + ret = pipe(fd); +#endif + + if (ret == -1) { + return Status::IOError("Error creating pipe: ", ErrnoMessage(errno)); + } + return Status::OK(); +} + +static Status StatusFromErrno(const char* prefix) { +#ifdef _WIN32 + errno = __map_mman_error(GetLastError(), EPERM); +#endif + return Status::IOError(prefix, ErrnoMessage(errno)); +} + +// +// Compatible way to remap a memory map +// + +Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, + void** new_addr) { + // should only be called with writable files + *new_addr = MAP_FAILED; +#ifdef _WIN32 + // flags are ignored on windows + HANDLE fm, h; + + if (!UnmapViewOfFile(addr)) { + return StatusFromErrno("UnmapViewOfFile failed: "); + } + + h = reinterpret_cast(_get_osfhandle(fildes)); + if (h == INVALID_HANDLE_VALUE) { + return StatusFromErrno("Cannot get file handle: "); + } + + uint64_t new_size64 = new_size; + LONG new_size_low = static_cast(new_size64 & 0xFFFFFFFFUL); + LONG new_size_high = static_cast((new_size64 >> 32) & 0xFFFFFFFFUL); + + SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN); + SetEndOfFile(h); + fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, ""); + if (fm == NULL) { + return StatusFromErrno("CreateFileMapping failed: "); + } + *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size); + CloseHandle(fm); + if (new_addr == NULL) { + return StatusFromErrno("MapViewOfFile failed: "); + } + return Status::OK(); +#else +#ifdef __APPLE__ + // we have to close the mmap first, truncate the file to the new size + // and recreate the mmap + if (munmap(addr, old_size) == -1) { + return StatusFromErrno("munmap failed: "); + } + if (ftruncate(fildes, new_size) == -1) { + return StatusFromErrno("ftruncate failed: "); + } + // we set READ / WRITE flags on the new map, since we could only have + // unlarged a RW map in the first place + *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0); + if (*new_addr == MAP_FAILED) { + return StatusFromErrno("mmap failed: "); + } + return Status::OK(); +#else + if (ftruncate(fildes, new_size) == -1) { + return StatusFromErrno("ftruncate failed: "); + } + *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE); + if (*new_addr == MAP_FAILED) { + return StatusFromErrno("mremap failed: "); + } + return Status::OK(); +#endif +#endif +} + +// +// Closing files +// + +Status FileClose(int fd) { + int ret; + +#if defined(_WIN32) + ret = static_cast(_close(fd)); +#else + ret = static_cast(close(fd)); +#endif + + if (ret == -1) { + return Status::IOError("error closing file"); + } + return Status::OK(); +} + +// +// Seeking and telling +// + +Status FileSeek(int fd, int64_t pos, int whence) { + int64_t ret = lseek64_compat(fd, pos, whence); + CHECK_LSEEK(ret); + return Status::OK(); +} + +Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); } + +Status FileGetSize(int fd, int64_t* size) { +#if defined(_WIN32) + struct __stat64 st; +#else + struct stat st; +#endif + st.st_size = -1; + +#if defined(_WIN32) + int ret = _fstat64(fd, &st); +#else + int ret = fstat(fd, &st); +#endif + + if (ret == -1) { + return Status::IOError("error stat()ing file"); + } + if (st.st_size == 0) { + // Maybe the file doesn't support getting its size, double-check by + // trying to tell() (seekable files usually have a size, while + // non-seekable files don't) + int64_t position; + RETURN_NOT_OK(FileTell(fd, &position)); + } else if (st.st_size < 0) { + return Status::IOError("error getting file size"); + } + *size = st.st_size; + return Status::OK(); +} + +// +// Reading data +// + +static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) { +#if defined(_WIN32) + HANDLE handle = reinterpret_cast(_get_osfhandle(fd)); + DWORD dwBytesRead = 0; + OVERLAPPED overlapped = {0}; + overlapped.Offset = static_cast(pos); + overlapped.OffsetHigh = static_cast(pos >> 32); + + // Note: ReadFile() will update the file position + BOOL bRet = + ReadFile(handle, buf, static_cast(nbytes), &dwBytesRead, &overlapped); + if (bRet || GetLastError() == ERROR_HANDLE_EOF) { + return dwBytesRead; + } else { + return -1; + } +#else + return static_cast( + pread(fd, buf, static_cast(nbytes), static_cast(pos))); +#endif +} + +Status FileRead(int fd, uint8_t* buffer, int64_t nbytes, int64_t* bytes_read) { + *bytes_read = 0; + + while (*bytes_read < nbytes) { + int64_t chunksize = + std::min(static_cast(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read); +#if defined(_WIN32) + int64_t ret = + static_cast(_read(fd, buffer, static_cast(chunksize))); +#else + int64_t ret = static_cast(read(fd, buffer, static_cast(chunksize))); +#endif + + if (ret == -1) { + *bytes_read = ret; + break; + } + if (ret == 0) { + // EOF + break; + } + buffer += ret; + *bytes_read += ret; + } + + if (*bytes_read == -1) { + return Status::IOError("Error reading bytes from file: ", ErrnoMessage(errno)); + } + + return Status::OK(); +} + +Status FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes, + int64_t* bytes_read) { + *bytes_read = 0; + + while (*bytes_read < nbytes) { + int64_t chunksize = + std::min(static_cast(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read); + int64_t ret = pread_compat(fd, buffer, chunksize, position); + + if (ret == -1) { + *bytes_read = ret; + break; + } + if (ret == 0) { + // EOF + break; + } + buffer += ret; + position += ret; + *bytes_read += ret; + } + + if (*bytes_read == -1) { + return Status::IOError("Error reading bytes from file: ", ErrnoMessage(errno)); + } + return Status::OK(); +} + +// +// Writing data +// + +Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) { + int ret = 0; + int64_t bytes_written = 0; + + while (ret != -1 && bytes_written < nbytes) { + int64_t chunksize = + std::min(static_cast(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written); +#if defined(_WIN32) + ret = static_cast( + _write(fd, buffer + bytes_written, static_cast(chunksize))); +#else + ret = static_cast( + write(fd, buffer + bytes_written, static_cast(chunksize))); +#endif + + if (ret != -1) { + bytes_written += ret; + } + } + + if (ret == -1) { + return Status::IOError("Error writing bytes to file: ", ErrnoMessage(errno)); + } + return Status::OK(); +} + +Status FileTruncate(int fd, const int64_t size) { + int ret, errno_actual; + +#ifdef _WIN32 + errno_actual = _chsize_s(fd, static_cast(size)); + ret = errno_actual == 0 ? 0 : -1; +#else + ret = ftruncate(fd, static_cast(size)); + errno_actual = errno; +#endif + + if (ret == -1) { + return Status::IOError("Error writing bytes to file: ", ErrnoMessage(errno_actual)); + } + return Status::OK(); +} + +// +// Environment variables +// + +Status GetEnvVar(const char* name, std::string* out) { +#ifdef _WIN32 + // On Windows, getenv() reads an early copy of the process' environment + // which doesn't get updated when SetEnvironmentVariable() is called. + constexpr int32_t bufsize = 2000; + char c_str[bufsize]; + auto res = GetEnvironmentVariableA(name, c_str, bufsize); + if (res >= bufsize) { + return Status::CapacityError("environment variable value too long"); + } else if (res == 0) { + return Status::KeyError("environment variable undefined"); + } + *out = std::string(c_str); + return Status::OK(); +#else + char* c_str = getenv(name); + if (c_str == nullptr) { + return Status::KeyError("environment variable undefined"); + } + *out = std::string(c_str); + return Status::OK(); +#endif +} + +Status GetEnvVar(const std::string& name, std::string* out) { + return GetEnvVar(name.c_str(), out); +} + +Status SetEnvVar(const char* name, const char* value) { +#ifdef _WIN32 + if (SetEnvironmentVariableA(name, value)) { + return Status::OK(); + } else { + return Status::Invalid("failed setting environment variable"); + } +#else + if (setenv(name, value, 1) == 0) { + return Status::OK(); + } else { + return Status::Invalid("failed setting environment variable"); + } +#endif +} + +Status SetEnvVar(const std::string& name, const std::string& value) { + return SetEnvVar(name.c_str(), value.c_str()); +} + +Status DelEnvVar(const char* name) { +#ifdef _WIN32 + if (SetEnvironmentVariableA(name, nullptr)) { + return Status::OK(); + } else { + return Status::Invalid("failed deleting environment variable"); + } +#else + if (unsetenv(name) == 0) { + return Status::OK(); + } else { + return Status::Invalid("failed deleting environment variable"); + } +#endif +} + +Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); } + +TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {} + +TemporaryDir::~TemporaryDir() { + Status st = DeleteDirTree(path_); + if (!st.ok()) { + ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st; + } +} + +Status TemporaryDir::Make(const std::string& prefix, std::unique_ptr* out) { + bfs::path path; + std::string suffix = MakeRandomName(8); + + BOOST_FILESYSTEM_TRY + path = bfs::temp_directory_path() / (prefix + suffix); + path += "/"; + BOOST_FILESYSTEM_CATCH + + PlatformFilename fn(path.native()); + bool created = false; + RETURN_NOT_OK(CreateDir(fn, &created)); + if (!created) { + // XXX Should we retry? + return Status::IOError("Path already exists: '", fn.ToString(), "'"); + } + out->reset(new TemporaryDir(std::move(fn))); + return Status::OK(); +} + +SignalHandler::SignalHandler() : SignalHandler(static_cast(nullptr)) {} + +SignalHandler::SignalHandler(Callback cb) { +#if ARROW_HAVE_SIGACTION + sa_.sa_handler = cb; + sa_.sa_flags = 0; + sigemptyset(&sa_.sa_mask); +#else + cb_ = cb; +#endif +} + +#if ARROW_HAVE_SIGACTION +SignalHandler::SignalHandler(const struct sigaction& sa) { + memcpy(&sa_, &sa, sizeof(sa)); +} +#endif + +SignalHandler::Callback SignalHandler::callback() const { +#if ARROW_HAVE_SIGACTION + return sa_.sa_handler; +#else + return cb_; +#endif +} + +#if ARROW_HAVE_SIGACTION +const struct sigaction& SignalHandler::action() const { return sa_; } +#endif + +Status GetSignalHandler(int signum, SignalHandler* out) { +#if ARROW_HAVE_SIGACTION + struct sigaction sa; + int ret = sigaction(signum, nullptr, &sa); + if (ret != 0) { + // TODO more detailed message using errno + return Status::IOError("sigaction call failed"); + } + *out = SignalHandler(sa); +#else + // To read the old handler, set the signal handler to something else temporarily + SignalHandler::Callback cb = signal(signum, SIG_IGN); + if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) { + // TODO more detailed message using errno + return Status::IOError("signal call failed"); + } + *out = SignalHandler(cb); +#endif + return Status::OK(); +} + +Status SetSignalHandler(int signum, const SignalHandler& handler, + SignalHandler* old_handler) { +#if ARROW_HAVE_SIGACTION + struct sigaction old_sa; + int ret = sigaction(signum, &handler.action(), &old_sa); + if (ret != 0) { + // TODO more detailed message using errno + return Status::IOError("sigaction call failed"); + } + if (old_handler != nullptr) { + *old_handler = SignalHandler(old_sa); + } +#else + SignalHandler::Callback cb = signal(signum, handler.callback()); + if (cb == SIG_ERR) { + // TODO more detailed message using errno + return Status::IOError("signal call failed"); + } + if (old_handler != nullptr) { + *old_handler = SignalHandler(cb); + } +#endif + return Status::OK(); +} + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 779c6c62f0b..ab1b88b3f4c 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -25,6 +25,7 @@ #include "parquet/hasher.h" #include "parquet/platform.h" #include "parquet/types.h" +#include "parquet/parquet_types.h" namespace parquet { @@ -57,6 +58,12 @@ class PARQUET_EXPORT BloomFilter { /// Get the number of bytes of bitset virtual uint32_t GetBitsetSize() const = 0; + virtual format::BloomFilterHash GetHashStrategy() const = 0; + + virtual format::BloomFilterAlgorithm GetHashAlgorithm() const = 0; + + virtual format::BloomFilterCompression GetBFCompression() const = 0; + /// Compute hash for 32 bits value by using its plain encoding result. /// /// @param value the value to hash. @@ -184,6 +191,12 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { void WriteTo(ArrowOutputStream* sink) const override; uint32_t GetBitsetSize() const override { return num_bytes_; } + format::BloomFilterHash GetHashStrategy() const override { return bfhash_; } + + format::BloomFilterAlgorithm GetHashAlgorithm() const override { return bfalgorithm_; } + + format::BloomFilterCompression GetBFCompression() const override { return bfcompression_; } + uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); } uint64_t Hash(float value) const override { return hasher_->Hash(value); } uint64_t Hash(double value) const override { return hasher_->Hash(value); } @@ -239,6 +252,12 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { // Algorithm used in this Bloom filter. Algorithm algorithm_; + format::BloomFilterCompression bfcompression_; + + format::BloomFilterAlgorithm bfalgorithm_; + + format::BloomFilterHash bfhash_; + // The hash pointer points to actual hash class used. std::unique_ptr hasher_; }; diff --git a/cpp/src/parquet/bloom_filter_test.cc b/cpp/src/parquet/bloom_filter_test.cc index 23aa4a58018..eb05d63485a 100644 --- a/cpp/src/parquet/bloom_filter_test.cc +++ b/cpp/src/parquet/bloom_filter_test.cc @@ -148,6 +148,65 @@ TEST(FPPTest, TestBloomFilter) { EXPECT_LT(exist, total_count * fpp); } + +TEST(HashTest, TestBloomFilter) { + // It counts the number of times FindHash returns true. + int exist = 0; + + // Total count of elements that will be used +#ifdef PARQUET_VALGRIND + const int total_count = 5000; +#else + const int total_count = 10000000; +#endif + + // Bloom filter fpp parameter + const double fpp = 0.01; + std::vector members; + BlockSplitBloomFilter bloom_filter; + bloom_filter.Init(BlockSplitBloomFilter::OptimalNumOfBits(total_count, fpp)); + int FIXED_LENGTH_data = 124; + + for (int i = 0; i < total_count; i++) { + parquet::ByteArray value; + char hello[FIXED_LENGTH_data];// = "parquet"; + int64_t startnumber = i*FIXED_LENGTH_data; + for ( int ci = 0; ci < FIXED_LENGTH_data; ci++ ) { + hello[FIXED_LENGTH_data-ci-1] = (startnumber%10) + 48; + startnumber /= 10; + } + hello[FIXED_LENGTH_data] = '\0'; + std::string test(hello); + // if (i % 2 == 0) { + int16_t definition_level = 1; + value.ptr = reinterpret_cast(test.c_str()); + value.len = test.size(); + members.push_back(test); + bloom_filter.InsertHash(bloom_filter.Hash(&value)); + } + uint32_t FIXED_LENGTH = 124; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-7);i++) dest[i] = '0'; + dest[FIXED_LENGTH-7] = '4'; + dest[FIXED_LENGTH-6] = '2'; + dest[FIXED_LENGTH-5] = '3'; + dest[FIXED_LENGTH-4] = '3'; + dest[FIXED_LENGTH-3] = '5'; + dest[FIXED_LENGTH-2] = '5'; + dest[FIXED_LENGTH-1] = '2'; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + + if (bloom_filter.FindHash(bloom_filter.Hash(&pba))){ + for (;exist < (int)members.size() && test.compare(members[exist])!=0;exist++) { + } + } + + // The exist should be probably less than 1000 according default FPP 0.01. + EXPECT_EQ(exist, 759949); +} + // The CompatibilityTest is used to test cross compatibility with parquet-mr, it reads // the Bloom filter binary generated by the Bloom filter class in the parquet-mr project // and tests whether the values inserted before could be filtered or not. diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 60c44ffa6d2..55ec43097db 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -128,6 +128,12 @@ class PARQUET_EXPORT ColumnReader { virtual Type::type type() const = 0; virtual const ColumnDescriptor* descr() const = 0; + + // Skip reading levels + // Returns the number of levels skipped + virtual int64_t Skip(int64_t num_rows_to_skip) = 0; + + virtual int64_t callReadBatch(int64_t batch_size,void* values,int64_t* values_read) = 0; }; // API to read values from a single column. This is a main client facing API. @@ -197,7 +203,18 @@ class TypedColumnReader : public ColumnReader { // Skip reading levels // Returns the number of levels skipped - virtual int64_t Skip(int64_t num_rows_to_skip) = 0; + // virtual int64_t Skip(int64_t num_rows_to_skip) = 0; + + int64_t callReadBatch(int64_t batch_size,void* value,int64_t* values_read){ + + + int16_t definition_level = 1; + int16_t repetition_level; + + int64_t r = ReadBatch(batch_size, &definition_level, &repetition_level,(T*)value,values_read); + + return r; + } }; namespace internal { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 4be8bb67361..60ad5f5288a 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include "arrow/array.h" #include "arrow/buffer_builder.h" @@ -194,6 +195,7 @@ class SerializedPageWriter : public PageWriter { } compressor_ = GetCodec(codec, compression_level); thrift_serializer_.reset(new ThriftSerializer); + current_page_row_set_index = 0; } int64_t WriteDictionaryPage(const DictionaryPage& page) override { @@ -371,6 +373,125 @@ class SerializedPageWriter : public PageWriter { page_header.__set_data_page_header_v2(data_page_header); } + int64_t WriteDataPagesWithIndex(const CompressedDataPage& page, format::PageLocation& ploc) override { + int64_t uncompressed_size = page.uncompressed_size(); + std::shared_ptr compressed_data = page.buffer(); + + format::DataPageHeader data_page_header; + data_page_header.__set_num_values(page.num_values()); + data_page_header.__set_encoding(ToThrift(page.encoding())); + data_page_header.__set_definition_level_encoding( + ToThrift(page.definition_level_encoding())); + data_page_header.__set_repetition_level_encoding( + ToThrift(page.repetition_level_encoding())); + data_page_header.__set_statistics(ToThrift(page.statistics())); + + format::PageHeader page_header; + page_header.__set_type(format::PageType::DATA_PAGE); + page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + page_header.__set_compressed_page_size(static_cast(compressed_data->size())); + page_header.__set_data_page_header(data_page_header); + // TODO(PARQUET-594) crc checksum + + int64_t start_pos = -1; + PARQUET_THROW_NOT_OK(sink_->Tell(&start_pos)); + if (data_page_offset_ == 0) { + data_page_offset_ = start_pos; + } + + int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_.get()); + PARQUET_THROW_NOT_OK(sink_->Write(compressed_data->data(), compressed_data->size())); + + total_uncompressed_size_ += uncompressed_size + header_size; + total_compressed_size_ += compressed_data->size() + header_size; + num_values_ += page.num_values(); + + int64_t current_pos = -1; + PARQUET_THROW_NOT_OK(sink_->Tell(¤t_pos)); + + ploc.offset = start_pos; + ploc.first_row_index = current_page_row_set_index; + ploc.compressed_page_size = page_header.compressed_page_size + (current_pos - start_pos); + current_page_row_set_index += page_header.data_page_header.num_values; + + return current_pos - start_pos; +} + + + /* sample Adding ColumnIndex from chunk to offset. + * Status HdfsParquetTableWriter::WritePageIndex() { + if (!state_->query_options().parquet_write_page_index) return Status::OK(); + + // Currently Impala only write Parquet files with a single row group. The current + // page index logic depends on this behavior as it only keeps one row group's + // statistics in memory. + DCHECK_EQ(file_metadata_.row_groups.size(), 1); + + parquet::RowGroup* row_group = &(file_metadata_.row_groups[0]); + // Write out the column indexes. + for (int i = 0; i < columns_.size(); ++i) { + auto& column = *columns_[i]; // column-writer + if (!column.valid_column_index_) continue; + column.column_index_.__set_boundary_order( + column.row_group_stats_base_->GetBoundaryOrder()); + // We always set null_counts. + column.column_index_.__isset.null_counts = true; + uint8_t* buffer = nullptr; + uint32_t len = 0; + RETURN_IF_ERROR(thrift_serializer_->SerializeToBuffer( + &column.column_index_, &len, &buffer)); + RETURN_IF_ERROR(Write(buffer, len)); + // Update the column_index_offset and column_index_length of the ColumnChunk + row_group->columns[i].__set_column_index_offset(file_pos_); + row_group->columns[i].__set_column_index_length(len); + file_pos_ += len; + } + // Write out the offset indexes. + for (int i = 0; i < columns_.size(); ++i) { + auto& column = *columns_[i]; // column-writer + uint8_t* buffer = nullptr; + uint32_t len = 0; + RETURN_IF_ERROR(thrift_serializer_->SerializeToBuffer( + &column.offset_index_, &len, &buffer)); + RETURN_IF_ERROR(Write(buffer, len)); + // Update the offset_index_offset and offset_index_length of the ColumnChunk + row_group->columns[i].__set_offset_index_offset(file_pos_); + row_group->columns[i].__set_offset_index_length(len); + file_pos_ += len; + } + return Status::OK(); +} + * + */ + + void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) { + // index_page_offset = -1 since they are not supported + + uint32_t ci_len, oi_len; + uint8_t* buffer; + if (file_pos_ == 0) sink_->Tell(&file_pos_); + thrift_serializer_->SerializeToBuffer(&ci,&ci_len,&buffer); + sink_->Write(buffer,ci_len); + thrift_serializer_->SerializeToBuffer(&oi,&oi_len,&buffer); + sink_->Write(buffer,oi_len); + + if (oi_offset == 0 && ci_offset == 0) { + oi_offset = ci_len; + } + + metadata_->WriteIndex(file_pos_, ci_offset, oi_offset, ci_len, oi_len); + + ci_offset += ci_len; + oi_offset += oi_len; + // Write metadata at end of column chunk + metadata_->WriteTo(sink_.get()); + } + + void WritePageBloomFilter( BlockSplitBloomFilter& bl_page_filter, int64_t& file_pos) { + sink_->Tell(&file_pos); + bl_page_filter.WriteTo(sink_.get()); + } + bool has_compressor() override { return (compressor_ != nullptr); } int64_t num_values() { return num_values_; } @@ -453,16 +574,45 @@ class SerializedPageWriter : public PageWriter { // Compression codec to use. std::unique_ptr<::arrow::util::Codec> compressor_; +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD std::string data_page_aad_; std::string data_page_header_aad_; std::shared_ptr meta_encryptor_; std::shared_ptr data_encryptor_; +======= + BlockSplitBloomFilter blf; +>>>>>>> 522c3f988... insert hash std::shared_ptr encryption_buffer_; std::map dict_encoding_stats_; std::map data_encoding_stats_; +======= + +<<<<<<< HEAD + // OffsetIndex stores the locations of the pages. + parquet::format::OffsetIndex offset_index_; + + // ColumnIndex stores the statistics of the pages. + parquet::format::ColumnIndex column_index_; + + // Memory consumption of the min/max values in the page index. + int64_t page_index_memory_consumption_ = 0; + + + /// In parquet::ColumnIndex we store the min and max values for each page. + /// However, we don't want to store very long strings, so we truncate them. + /// The value of it must not be too small, since we don't want to truncate + /// non-string values. + static const int PAGE_INDEX_MAX_STRING_LENGTH = 64; +>>>>>>> 34b755b70... column index +======= +>>>>>>> c0fbc925b... write index +======= +>>>>>>> 6225e423c... page level bloom filter }; // This implementation of the PageWriter writes to the final sink on Close . @@ -514,6 +664,18 @@ class BufferedPageWriter : public PageWriter { return pager_->WriteDataPage(page); } + int64_t WriteDataPagesWithIndex(const parquet::CompressedDataPage &page, format::PageLocation& ploc) override { + return pager_->WriteDataPagesWithIndex(page, ploc); + } + + void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) { + pager_->WriteIndex(file_pos_, ci_offset, oi_offset, ci, oi); + } + + void WritePageBloomFilter(BlockSplitBloomFilter& blf, int64_t& f_pos) { + //pager_->WritePageBloomFilter(blf, f_pos); + } + void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override { pager_->Compress(src_buffer, dest_buffer); } @@ -595,6 +757,19 @@ class ColumnWriterImpl { int64_t Close(); + int64_t CloseWithIndex(); + + void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset); + + void WriteBloomFilterOffset(int64_t& file_pos); + + void WritePageBloomFilter(BlockSplitBloomFilter& bl_page_filter) { + int64_t f_pos; + //pager_->WritePageBloomFilter(bl_page_filter, f_pos); + //AddBloomFilterOffsetToOffsetIndex(f_pos); + + } + protected: virtual std::shared_ptr GetValuesBuffer() = 0; @@ -614,18 +789,32 @@ class ColumnWriterImpl { // Serializes the Data Pages in other encoding modes void AddDataPage(); +<<<<<<< HEAD +<<<<<<< HEAD void BuildDataPageV1(int64_t definition_levels_rle_size, int64_t repetition_levels_rle_size, int64_t uncompressed_size, const std::shared_ptr& values); void BuildDataPageV2(int64_t definition_levels_rle_size, int64_t repetition_levels_rle_size, int64_t uncompressed_size, const std::shared_ptr& values); +======= + // Adds Data Pages to an in memory buffer in dictionary encoding mode + // Serializes the Data Pages in other encoding modes +======= + +>>>>>>> 04c793ab2... write index + void AddDataPageWithIndex(); +>>>>>>> 009899ff1... write data page // Serializes Data Pages void WriteDataPage(const DataPage& page) { total_bytes_written_ += pager_->WriteDataPage(page); } + void WriteDataPageWithIndex(const CompressedDataPage& page, format::PageLocation& ploc) { + total_bytes_written_ += pager_->WriteDataPagesWithIndex(page, ploc); + } + // Write multiple definition levels void WriteDefinitionLevels(int64_t num_levels, const int16_t* levels) { DCHECK(!closed_); @@ -647,6 +836,8 @@ class ColumnWriterImpl { // Serialize the buffered Data Pages void FlushBufferedDataPages(); + void FlushBufferedDataPagesWithIndex(); + ColumnChunkMetaDataBuilder* metadata_; const ColumnDescriptor* descr_; // scratch buffer if validity bits need to be recalculated. @@ -655,6 +846,8 @@ class ColumnWriterImpl { std::unique_ptr pager_; + std::unique_ptr thrift_serializer_; + bool has_dictionary_; Encoding::type encoding_; const WriterProperties* properties_; @@ -701,12 +894,100 @@ class ColumnWriterImpl { std::vector> data_pages_; + /// In parquet::ColumnIndex we store the min and max values for each page. + /// However, we don't want to store very long strings, so we truncate them. + /// The value of it must not be too small, since we don't want to truncate + /// non-string values. + static const int PAGE_INDEX_MAX_STRING_LENGTH = 128; + + ::arrow::Status AddMemoryConsumptionForPageIndex(int64_t new_memory_allocation) { + page_index_memory_consumption_ += new_memory_allocation; + return ::arrow::Status::OK(); + } + + ::arrow::Status ReserveOffsetIndex(int64_t capacity) { + PARQUET_THROW_NOT_OK(AddMemoryConsumptionForPageIndex(capacity * (sizeof(parquet::format::PageLocation)) + sizeof(int64_t))); + offset_index_.page_locations.reserve(capacity); + offset_index_.page_bloom_filter_offsets.reserve(capacity); + return ::arrow::Status::OK(); + } + + void AddLocationToOffsetIndex(const parquet::format::PageLocation location) { + offset_index_.page_locations.push_back(location); + } + + void AddBloomFilterOffsetToOffsetIndex(const int64_t page_blf_offset) { + offset_index_.page_bloom_filter_offsets.push_back(page_blf_offset); + } + + ::arrow::Status TruncateDown ( std::string min, int32_t max_length, std::string* result ) { + *result = min.substr(0, std::min(static_cast(min.length()), max_length)); + return Status::OK(); + } + + ::arrow::Status TruncateUp ( std::string max, int32_t max_length, std::string* result) { + if (max.length() <= (uint32_t) max_length) { + *result = max; + } + + *result = max.substr(0, max_length); + int i = max_length - 1; + while (i > 0 && static_cast((*result)[i]) == -1) { + (*result)[i] += 1; + --i; + } + // We convert it to unsigned because signed overflow results in undefined behavior. + unsigned char uch = static_cast((*result)[i]); + uch += 1; + (*result)[i] = uch; + if (i == 0 && (*result)[i] == 0) { + return Status(::arrow::StatusCode::CapacityError,"TruncateUp() couldn't increase string."); + } + result->resize(i + 1); + return Status::OK(); + } + + ::arrow::Status AddPageStatsToColumnIndex(const parquet::EncodedStatistics page_stats) { + // If pages_stats contains min_value and max_value, then append them to min_values_ + // and max_values_ and also mark the page as not null. In case min and max values are + // not set, push empty strings to maintain the consistency of the index and mark the + // page as null. Always push the null_count. + std::string min_val; + std::string max_val; + + if (page_stats.is_set()) { + + Status s_min = TruncateDown(page_stats.min(), PAGE_INDEX_MAX_STRING_LENGTH, &min_val); + + Status s_max = TruncateDown(page_stats.max(), PAGE_INDEX_MAX_STRING_LENGTH, &max_val); + + if (!s_min.ok()) { + return s_min; + } + if (!s_max.ok()) { + return s_max; + } + + column_index_.null_pages.push_back(false); + } else { + DCHECK(!page_stats.is_set()); + column_index_.null_pages.push_back(true); + } + PARQUET_THROW_NOT_OK( + AddMemoryConsumptionForPageIndex(min_val.capacity() + max_val.capacity())); + column_index_.min_values.emplace_back(std::move(min_val)); + column_index_.max_values.emplace_back(std::move(max_val)); + column_index_.null_counts.push_back(page_stats.null_count); + return Status::OK(); + } + private: void InitSinks() { definition_levels_sink_.Rewind(0); repetition_levels_sink_.Rewind(0); } +<<<<<<< HEAD // Concatenate the encoded levels and values into one buffer void ConcatenateBuffers(int64_t definition_levels_rle_size, int64_t repetition_levels_rle_size, @@ -717,6 +998,19 @@ class ColumnWriterImpl { combined += definition_levels_rle_size; memcpy(combined, values->data(), values->size()); } +======= + // OffsetIndex stores the locations of the pages. + parquet::format::OffsetIndex offset_index_; + + // ColumnIndex stores the statistics of the pages. + parquet::format::ColumnIndex column_index_; + + // Memory consumption of the min/max values in the page index. + int64_t page_index_memory_consumption_ = 0; + + parquet::format::PageLocation ploc; + +>>>>>>> c0fbc925b... write index }; // return the size of the encoded buffer @@ -877,6 +1171,78 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size, } } +void ColumnWriterImpl::AddDataPageWithIndex() { + int64_t definition_levels_rle_size = 0; + int64_t repetition_levels_rle_size = 0; + + std::shared_ptr values = GetValuesBuffer(); + + if (descr_->max_definition_level() > 0) { + definition_levels_rle_size = + RleEncodeLevels(definition_levels_sink_.data(), definition_levels_rle_.get(), + descr_->max_definition_level()); + } + + if (descr_->max_repetition_level() > 0) { + repetition_levels_rle_size = + RleEncodeLevels(repetition_levels_sink_.data(), repetition_levels_rle_.get(), + descr_->max_repetition_level()); + } + + int64_t uncompressed_size = + definition_levels_rle_size + repetition_levels_rle_size + values->size(); + + // Use Arrow::Buffer::shrink_to_fit = false + // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. + PARQUET_THROW_NOT_OK(uncompressed_data_->Resize(uncompressed_size, false)); + + // Concatenate data into a single buffer + uint8_t* uncompressed_ptr = uncompressed_data_->mutable_data(); + memcpy(uncompressed_ptr, repetition_levels_rle_->data(), repetition_levels_rle_size); + uncompressed_ptr += repetition_levels_rle_size; + memcpy(uncompressed_ptr, definition_levels_rle_->data(), definition_levels_rle_size); + uncompressed_ptr += definition_levels_rle_size; + memcpy(uncompressed_ptr, values->data(), values->size()); + + EncodedStatistics page_stats = GetPageStatistics(); + page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path())); + page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); + AddPageStatsToColumnIndex(page_stats); + ResetPageStatistics(); + + std::shared_ptr compressed_data; + if (pager_->has_compressor()) { + pager_->Compress(*(uncompressed_data_.get()), compressed_data_.get()); + compressed_data = compressed_data_; + } else { + compressed_data = uncompressed_data_; + } + + // Write the page to OutputStream eagerly if there is no dictionary or + // if dictionary encoding has fallen back to PLAIN + if (has_dictionary_ && !fallback_) { // Save pages until end of dictionary encoding + std::shared_ptr compressed_data_copy; + PARQUET_THROW_NOT_OK(compressed_data->Copy(0, compressed_data->size(), allocator_, + &compressed_data_copy)); + CompressedDataPage page(compressed_data_copy, + static_cast(num_buffered_values_), encoding_, + Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); + total_compressed_bytes_ += page.size() + sizeof(format::PageHeader); + data_pages_.push_back(std::move(page)); + } else { // Eagerly write pages + CompressedDataPage page(compressed_data, static_cast(num_buffered_values_), + encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, + page_stats); + WriteDataPageWithIndex(page,ploc); + AddLocationToOffsetIndex(ploc); + } + + // Re-initialize the sinks for next Page. + InitSinks(); + num_buffered_values_ = 0; + num_buffered_encoded_values_ = 0; +} + int64_t ColumnWriterImpl::Close() { if (!closed_) { closed_ = true; @@ -901,6 +1267,38 @@ int64_t ColumnWriterImpl::Close() { return total_bytes_written_; } +int64_t ColumnWriterImpl::CloseWithIndex() { + if (!closed_) { + closed_ = true; + if (has_dictionary_ && !fallback_) { + WriteDictionaryPage(); + } + + FlushBufferedDataPagesWithIndex(); + + EncodedStatistics chunk_statistics = GetChunkStatistics(); + chunk_statistics.ApplyStatSizeLimits( + properties_->max_statistics_size(descr_->path())); + chunk_statistics.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); + + if (rows_written_ > 0 && chunk_statistics.is_set()) { + metadata_->SetStatistics(chunk_statistics); + } + + pager_->Close(has_dictionary_, fallback_); + } + + return total_bytes_written_; +} + +void ColumnWriterImpl::WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset) { + pager_->WriteIndex(file_pos_, ci_offset, oi_offset, column_index_, offset_index_); +} + +void ColumnWriterImpl::WriteBloomFilterOffset(int64_t& file_pos_) { + metadata_->WriteBloomFilterOffset(file_pos_); +} + void ColumnWriterImpl::FlushBufferedDataPages() { // Write all outstanding data to a new page if (num_buffered_values_ > 0) { @@ -913,6 +1311,24 @@ void ColumnWriterImpl::FlushBufferedDataPages() { total_compressed_bytes_ = 0; } +void ColumnWriterImpl::FlushBufferedDataPagesWithIndex() { + + if (num_buffered_values_ > 0) { + AddDataPageWithIndex(); + } + + PARQUET_THROW_NOT_OK(ReserveOffsetIndex(data_pages_.size())); + + for (size_t i = 0; i < data_pages_.size(); i++) { + // AddPageStatsToColumnIndex(data_pages_[i].statistics()); + WriteDataPageWithIndex(data_pages_[i],ploc); + AddLocationToOffsetIndex(ploc); + } + + data_pages_.clear(); + total_compressed_bytes_ = 0; +} + // ---------------------------------------------------------------------- // TypedColumnWriter @@ -982,6 +1398,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< int64_t Close() override { return ColumnWriterImpl::Close(); } +<<<<<<< Updated upstream +======= +<<<<<<< HEAD +>>>>>>> Stashed changes int64_t WriteBatch(int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels, const T* values) override { // We check for DataPage limits only after we have inserted the values. If a user @@ -1009,8 +1429,28 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< }; DoInBatches(num_values, properties_->write_batch_size(), WriteChunk); return value_offset; +<<<<<<< Updated upstream +======= +======= + int64_t CloseWithIndex() override { + int64_t total_bytes_written = ColumnWriterImpl::CloseWithIndex(); + //WritePageBloomFilter(); + return total_bytes_written; + } + + void WriteIndex(int64_t file_pos_, int64_t ci_offset, int64_t oi_offset) override { + return ColumnWriterImpl::WriteIndex(file_pos_, ci_offset, oi_offset); +>>>>>>> c0fbc925b... write index +>>>>>>> Stashed changes } + void WriteBloomFilterOffset(int64_t& file_pos) override { + ColumnWriterImpl::WriteBloomFilterOffset(file_pos); + } + + void WriteBatch(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, const T* values, bool with_index) override; + void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) override { @@ -1106,6 +1546,19 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< total_bytes_written_ += pager_->WriteDictionaryPage(page); } +<<<<<<< HEAD +======= + // Checks if the Dictionary Page size limit is reached + // If the limit is reached, the Dictionary and Data Pages are serialized + // The encoding is switched to PLAIN + void CheckDictionarySizeLimit(bool with_index); + +<<<<<<< HEAD + void CheckDictionarySizeLimitWithIndex(); + +>>>>>>> e07017907... writer;generic reader +======= +>>>>>>> c0fbc925b... write index EncodedStatistics GetPageStatistics() override { EncodedStatistics result; if (page_statistics_) result = page_statistics_->Encode(); @@ -1144,6 +1597,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< std::shared_ptr page_statistics_; std::shared_ptr chunk_statistics_; +<<<<<<< HEAD +<<<<<<< HEAD // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the // dictionary passed to DictEncoder::PutDictionary so we can check // subsequent array chunks to see either if materialization is required (in @@ -1160,6 +1615,97 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< ++values_to_write; } } +======= +======= + std::vector blf; + int num_rows_for_bf; + const double fpp = 0.001; + +>>>>>>> 6225e423c... page level bloom filter + inline int64_t WriteMiniBatch(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, const T* values, bool with_index); + + inline int64_t WriteMiniBatchSpaced(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, + const uint8_t* valid_bits, + int64_t valid_bits_offset, const T* values, + int64_t* num_spaced_written); + + // Write values to a temporary buffer before they are encoded into pages + void WriteValues(int64_t num_values, const T* values) { + dynamic_cast(current_encoder_.get()) + ->Put(values, static_cast(num_values)); + } + + void WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits, + int64_t valid_bits_offset, const T* values) { + dynamic_cast(current_encoder_.get()) + ->PutSpaced(values, static_cast(num_values), valid_bits, valid_bits_offset); + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const int32_t* values) { + for ( uint32_t i=0; i < num_values; i++){ + blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(values[i])); + } + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const int64_t* values) { + for ( uint32_t i=0; i < num_values; i++){ + blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(values[i])); + } + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const float* values) { + for ( uint32_t i=0; i < num_values; i++){ + blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash((float)(int64_t)values[i])); + } + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const double* values) { + for ( uint32_t i=0; i < num_values; i++){ + blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash((double)(int64_t)values[i])); + } + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const ByteArray* values) { + for ( uint32_t i=0; i < num_values; i++){ + blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(&values[i])); + } + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const Int96* values) { + + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const FLBA* values) { + + } + + void AppendValuesToPageBloomFilter(int64_t num_values, const bool* values) { + + } + + void WritePageBloomFilter() { + for (uint32_t i=0; i < blf.size(); i++) + ColumnWriterImpl::WritePageBloomFilter(blf[i]); + } + + void InitializeBloomF() { + if (blf.size() == 0) { + BlockSplitBloomFilter bf; + bf.Init(BlockSplitBloomFilter::OptimalNumOfBits(properties_->write_batch_size() << 8,fpp)); + blf.push_back(std::move(bf)); + } + } + + void NewPageBloomFilter() { + BlockSplitBloomFilter bf; + bf.Init(BlockSplitBloomFilter::OptimalNumOfBits(properties_->write_batch_size() << 8,fpp)); + blf.push_back(std::move(bf)); + } + +}; +>>>>>>> e07017907... writer;generic reader WriteDefinitionLevels(num_values, def_levels); } else { @@ -1167,6 +1713,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< values_to_write = num_values; } +<<<<<<< HEAD +<<<<<<< HEAD // Not present for non-repeated fields if (descr_->max_repetition_level() > 0) { // A row could include more than one value @@ -1176,7 +1724,47 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< rows_written_++; } } +======= +template +<<<<<<< HEAD +void TypedColumnWriterImpl::CheckDictionarySizeLimitWithIndex() { +======= +void TypedColumnWriterImpl::CheckDictionarySizeLimit(bool with_index) { +>>>>>>> d78a37051... write index + // We have to dynamic cast here because TypedEncoder as some compilers + // don't want to cast through virtual inheritance + auto dict_encoder = dynamic_cast*>(current_encoder_.get()); + //if (dict_encoder->dict_encoded_size() >= properties_->dictionary_pagesize_limit()) { + WriteDictionaryPage(); + // Serialize the buffered Dictionary Indicies +<<<<<<< HEAD + FlushBufferedDataPagesWithIndex(); +======= + if (!with_index) + FlushBufferedDataPages(); + else{ + NewPageBloomFilter(); + FlushBufferedDataPagesWithIndex(); +<<<<<<< HEAD +>>>>>>> d78a37051... write index +======= + } +>>>>>>> b4a66445c... page write bloom + fallback_ = true; + // Only PLAIN encoding is supported for fallback in V1 + current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_, + properties_->memory_pool()); + encoding_ = Encoding::PLAIN; + //} +} + +======= +>>>>>>> c0fbc925b... write index +// ---------------------------------------------------------------------- +// Instantiate templated classes +>>>>>>> e07017907... writer;generic reader +<<<<<<< HEAD WriteRepetitionLevels(num_values, rep_levels); } else { // Each value is exactly one row @@ -1201,6 +1789,23 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< def_levels[x] >= level_info_.repeated_ancestor_def_level ? 1 : 0; } *null_count = *out_values_to_write - *out_spaced_values_to_write; +<<<<<<< Updated upstream +======= +======= +template +int64_t TypedColumnWriterImpl::WriteMiniBatch(int64_t num_values, + const int16_t* def_levels, + const int16_t* rep_levels, + const T* values, + bool with_index) { + int64_t values_to_write = 0; + // If the field is required and non-repeated, there are no definition levels + if (descr_->max_definition_level() > 0) { + for (int64_t i = 0; i < num_values; ++i) { + if (def_levels[i] == descr_->max_definition_level()) { + ++values_to_write; +>>>>>>> fa45c95a5... write index +>>>>>>> Stashed changes } return; } @@ -1254,17 +1859,58 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< // Each value is exactly one row rows_written_ += static_cast(num_levels); } +<<<<<<< Updated upstream +======= +<<<<<<< HEAD +======= + + WriteRepetitionLevels(num_values, rep_levels); + } else { + // Each value is exactly one row + rows_written_ += static_cast(num_values); + } + + // PARQUET-780 + if (values_to_write > 0) { + DCHECK(nullptr != values) << "Values ptr cannot be NULL"; + } + + WriteValues(values_to_write, values); + //AppendValuesToPageBloomFilter(values_to_write,values); + + if (page_statistics_ != nullptr) { + page_statistics_->Update(values, values_to_write, num_values - values_to_write); +>>>>>>> 6225e423c... page level bloom filter +>>>>>>> Stashed changes } void CommitWriteAndCheckPageLimit(int64_t num_levels, int64_t num_values) { num_buffered_values_ += num_levels; num_buffered_encoded_values_ += num_values; +<<<<<<< HEAD if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) { AddDataPage(); } +======= + if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) { + //NewPageBloomFilter(); + + if (!with_index) + AddDataPage(); + else + AddDataPageWithIndex(); + } + if (has_dictionary_ && !fallback_) { +<<<<<<< HEAD + CheckDictionarySizeLimit(); +>>>>>>> fa45c95a5... write index +======= + CheckDictionarySizeLimit(with_index); +>>>>>>> d78a37051... write index } +<<<<<<< HEAD void FallbackToPlainEncoding() { if (IsDictionaryEncoding(current_encoder_->encoding())) { WriteDictionaryPage(); @@ -1275,6 +1921,32 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_, properties_->memory_pool()); encoding_ = Encoding::PLAIN; +======= + return values_to_write; +} + +template +int64_t TypedColumnWriterImpl::WriteMiniBatchSpaced( + int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels, + const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values, + int64_t* num_spaced_written) { + int64_t values_to_write = 0; + int64_t spaced_values_to_write = 0; + // If the field is required and non-repeated, there are no definition levels + if (descr_->max_definition_level() > 0) { + // Minimal definition level for which spaced values are written + int16_t min_spaced_def_level = descr_->max_definition_level(); + if (descr_->schema_node()->is_optional()) { + min_spaced_def_level--; + } + for (int64_t i = 0; i < num_levels; ++i) { + if (def_levels[i] == descr_->max_definition_level()) { + ++values_to_write; + } + if (def_levels[i] >= min_spaced_def_level) { + ++spaced_values_to_write; + } +>>>>>>> e07017907... writer;generic reader } } @@ -1352,6 +2024,7 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( maybe_parent_nulls); }; +<<<<<<< HEAD if (!IsDictionaryEncoding(current_encoder_->encoding()) || !DictionaryDirectWriteSupported(array)) { // No longer dictionary-encoding for whatever reason, maybe we never were @@ -1361,6 +2034,13 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( // dictionary page limit is reached, at which everything (dictionary and // dense) will fall back to plain encoding return WriteDense(); +======= + if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) { + AddDataPage(); + } + if (has_dictionary_ && !fallback_) { + CheckDictionarySizeLimit(false); +>>>>>>> d78a37051... write index } auto dict_encoder = dynamic_cast*>(current_encoder_.get()); @@ -1368,7 +2048,28 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( std::shared_ptr<::arrow::Array> dictionary = data.dictionary(); std::shared_ptr<::arrow::Array> indices = data.indices(); +<<<<<<< HEAD +======= +template +void TypedColumnWriterImpl::WriteBatch(int64_t num_values, + const int16_t* def_levels, + const int16_t* rep_levels, + const T* values, + bool with_index) { + // We check for DataPage limits only after we have inserted the values. If a user + // writes a large number of values, the DataPage size can be much above the limit. + // The purpose of this chunking is to bound this. Even if a user writes large number + // of values, the chunking will ensure the AddDataPage() is called at a reasonable + // pagesize limit + int64_t write_batch_size = properties_->write_batch_size(); + + //InitializeBloomF(); + + int num_batches = static_cast(num_values / write_batch_size); + int64_t num_remaining = num_values % write_batch_size; +>>>>>>> e07017907... writer;generic reader int64_t value_offset = 0; +<<<<<<< HEAD auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) { int64_t batch_num_values = 0; int64_t batch_num_spaced_values = 0; @@ -1387,7 +2088,21 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( CommitWriteAndCheckPageLimit(batch_size, batch_num_values); value_offset += batch_num_spaced_values; }; +======= + for (int round = 0; round < num_batches; round++) { + int64_t offset = round * write_batch_size; + int64_t num_values = WriteMiniBatch(write_batch_size, &def_levels[offset], + &rep_levels[offset], &values[value_offset], with_index); + value_offset += num_values; + } + // Write the remaining values + int64_t offset = num_batches * write_batch_size; + WriteMiniBatch(num_remaining, &def_levels[offset], &rep_levels[offset], + &values[value_offset], with_index); +} +>>>>>>> fa45c95a5... write index +<<<<<<< HEAD // Handle seeing dictionary for the first time if (!preserved_dictionary_) { // It's a new dictionary. Call PutDictionary and keep track of it @@ -1403,6 +2118,28 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( // Dictionary has changed PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding()); return WriteDense(); +======= +template +void TypedColumnWriterImpl::WriteBatchSpaced( + int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels, + const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) { + // We check for DataPage limits only after we have inserted the values. If a user + // writes a large number of values, the DataPage size can be much above the limit. + // The purpose of this chunking is to bound this. Even if a user writes large number + // of values, the chunking will ensure the AddDataPage() is called at a reasonable + // pagesize limit + int64_t write_batch_size = properties_->write_batch_size(); + int num_batches = static_cast(num_values / write_batch_size); + int64_t num_remaining = num_values % write_batch_size; + int64_t num_spaced_written = 0; + int64_t values_offset = 0; + for (int round = 0; round < num_batches; round++) { + int64_t offset = round * write_batch_size; + WriteMiniBatchSpaced(write_batch_size, &def_levels[offset], &rep_levels[offset], + valid_bits, valid_bits_offset + values_offset, + values + values_offset, &num_spaced_written); + values_offset += num_spaced_written; +>>>>>>> 522c3f988... insert hash } PARQUET_CATCH_NOT_OK( @@ -1519,8 +2256,16 @@ Status TypedColumnWriterImpl::WriteArrowDense( if (array.type_id() != ::arrow::Type::BOOL) { ARROW_UNSUPPORTED(); } +<<<<<<< HEAD return WriteArrowSerialize( array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls); +<<<<<<< Updated upstream +======= +======= + PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, buffer,false)); + return Status::OK(); +>>>>>>> fa45c95a5... write index +>>>>>>> Stashed changes } // ---------------------------------------------------------------------- @@ -1559,7 +2304,7 @@ Status TypedColumnWriterImpl::WriteArrowDense( const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { switch (array.type()->id()) { case ::arrow::Type::NA: { - PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr)); + PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr,false)); } break; WRITE_SERIALIZE_CASE(INT8, Int8Type, Int32Type) WRITE_SERIALIZE_CASE(UINT8, UInt8Type, Int32Type) diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h index 57f98533a72..411215aeda5 100644 --- a/cpp/src/parquet/column_writer.h +++ b/cpp/src/parquet/column_writer.h @@ -24,6 +24,8 @@ #include "parquet/exception.h" #include "parquet/platform.h" #include "parquet/types.h" +#include "parquet/parquet_types.h" +#include "parquet/bloom_filter.h" namespace arrow { @@ -99,11 +101,19 @@ class PARQUET_EXPORT PageWriter { virtual int64_t WriteDataPage(const DataPage& page) = 0; + virtual int64_t WriteDataPagesWithIndex(const CompressedDataPage& data_page, format::PageLocation& ploc) = 0; + + virtual void WritePageBloomFilter(BlockSplitBloomFilter& bl_page_filter, int64_t& file_pos) = 0; + + virtual void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) = 0; + virtual int64_t WriteDictionaryPage(const DictionaryPage& page) = 0; virtual bool has_compressor() = 0; virtual void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) = 0; + + int64_t current_page_row_set_index; }; static constexpr int WRITE_BATCH_SIZE = 1000; @@ -119,6 +129,12 @@ class PARQUET_EXPORT ColumnWriter { /// \return Total size of the column in bytes virtual int64_t Close() = 0; + virtual int64_t CloseWithIndex() = 0; + + virtual void WriteIndex(int64_t file_pos_, int64_t ci_offset, int64_t oi_offset) = 0; + + virtual void WriteBloomFilterOffset(int64_t& file_pos) = 0; + /// \brief The physical Parquet type of the column virtual Type::type type() const = 0; @@ -170,7 +186,13 @@ class TypedColumnWriter : public ColumnWriter { // It can be smaller than `num_values` is there are some undefined values. virtual int64_t WriteBatch(int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels, const T* values) = 0; - + virtual void WriteBatch(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, const T* values) = 0; + + // Write a batch of repetition levels, definition levels, and values to the + // column. + virtual void WriteBatchWithIndex(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, const T* values, bool with_index = false) = 0; /// Write a batch of repetition levels, definition levels, and values to the /// column. /// @@ -203,6 +225,7 @@ class TypedColumnWriter : public ColumnWriter { // Estimated size of the values that are not written to a page yet virtual int64_t EstimatedBufferedValueBytes() const = 0; + }; using BoolWriter = TypedColumnWriter; diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 67f211b29c7..8cf0ae38504 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -42,6 +42,11 @@ #include "parquet/properties.h" #include "parquet/schema.h" #include "parquet/types.h" +<<<<<<< HEAD +======= +#include +#include +>>>>>>> cc47998e9... high precision time namespace parquet { @@ -49,6 +54,9 @@ namespace parquet { static constexpr int64_t kDefaultFooterReadSize = 64 * 1024; static constexpr uint32_t kFooterSize = 8; +static constexpr uint32_t kColumnIndexReadSize = 16*1024; +static constexpr uint32_t kOffsetIndexReadSize = 16*1024; + // For PARQUET-816 static constexpr int64_t kMaxDictHeaderSize = 100; @@ -83,9 +91,47 @@ std::unique_ptr RowGroupReader::GetColumnPageReader(int i) { return contents_->GetColumnPageReader(i); } + +std::unique_ptr RowGroupReader::GetColumnPageReaderWithIndex(int i,void* predicate, int64_t& min_index, + int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index, + parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf, + bool& first_time_blf,bool& first_time_index, + float& blf_load_time, float& index_load_time) { + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; + return contents_->GetColumnPageReaderWithIndex(i,predicate, min_index, predicate_col, row_index,type_num, with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter, with_page_bf, + unsorted_min_index, unsorted_row_index, col_index, offset_index, blf, first_time_blf,first_time_index, + blf_load_time, index_load_time); +} + +std::shared_ptr RowGroupReader::ColumnWithIndex(int i,void* predicate, int64_t& min_index, int predicate_col, + int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index) { + DCHECK(i < metadata()->num_columns()) + << "The RowGroup only has " << metadata()->num_columns() + << "columns, requested column: " << i; + const ColumnDescriptor* descr = metadata()->schema()->Column(i); + + std::unique_ptr page_reader = contents_->GetColumnPageReaderWithIndex(i,predicate, min_index, predicate_col, row_index,type_num, with_index, binary_search, count_pages_scanned, + total_num_pages, last_first_row, with_bloom_filter, with_page_bf, + unsorted_min_index, unsorted_row_index, col_index,offset_index,blf,first_time_blf,first_time_index, + blf_load_time, index_load_time); + return ColumnReader::Make( + descr, std::move(page_reader), + const_cast(contents_->properties())->memory_pool()); +} + // Returns the rowgroup metadata const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); } +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD /// Compute the section of the file that should be read for the given /// row group and column chunk. arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata, @@ -115,7 +161,15 @@ arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata, return {col_start, col_length}; } - +======= +uint64_t page_offset=0,num_values=0,next_page_offset=0; +>>>>>>> 10d3ed008... setup for file offset +======= +uint64_t page_offset,num_values,next_page_offset; +>>>>>>> c0a9bb12f... default setup + +======= +>>>>>>> c0ee60adc... generic reader // RowGroupReader::Contents implementation for the Parquet file specification class SerializedRowGroup : public RowGroupReader::Contents { public: @@ -138,6 +192,12 @@ class SerializedRowGroup : public RowGroupReader::Contents { const ReaderProperties* properties() const override { return &properties_; } +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD std::unique_ptr GetColumnPageReader(int i) override { // Read column chunk from the file auto col = row_group_metadata_->ColumnChunk(i); @@ -153,7 +213,1053 @@ class SerializedRowGroup : public RowGroupReader::Contents { } else { stream = properties_.GetStream(source_, col_range.offset, col_range.length); } + void SkipPages(long int v) const { + + void GoToPage(int64_t v, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index, uint64_t& page_offset,uint64_t& num_values,uint64_t& next_page_offset) const { +======= + void GoToPage(int64_t v, int64_t default_start, int64_t default_next_page_offset, int64_t default_num_values,parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index, uint64_t& page_offset,uint64_t& num_values,uint64_t& next_page_offset) const { +>>>>>>> c0a9bb12f... default setup + std::vector::size_type itemindex = 0; +======= + + void GetRowRangeForPage(uint64_t& row_group_index, parquet::format::OffsetIndex offset_index, uint64_t page_idx, uint64_t& row_range_start, uint64_t& row_range_end) { + const auto& page_locations = offset_index.page_locations; + DCHECK(page_idx < page_locations.size()) << "The page start index " << page_idx << " is greater than last page" << page_locations.size(); + row_range_start = page_locations[page_idx].first_row_index; + if (page_idx == page_locations.size() - 1) { + row_range_end = row_range_end - row_range_start - 1; + } else { + row_range_end = page_locations[page_idx + 1].first_row_index - 1; + } + } + +======= +>>>>>>> c0ee60adc... generic reader + void GetPageIndex(int64_t v, int64_t& min_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index) const { +<<<<<<< HEAD +// std::vector::size_type itemindex = 0; +>>>>>>> b2788ebb5... added one page check + //std::vector min_vec = std::vector>(col_index.min_values.begin(), col_index.min_values.end()); + int64_t min_diff = std::numeric_limits::max();//std::lower_bound(min_vec.begin(),min_vec.end(),v); +======= +>>>>>>> e33dd0dac... changed binary search + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str(); + int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str(); + int64_t max_diff = *page_max - *page_min; +======= + void GetPageIndex(void* predicate, int64_t& min_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index,Type::type type_num) const { +======= + void GetPageIndex(void* predicate, int64_t& min_index,int64_t& row_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index,Type::type type_num) const { +<<<<<<< HEAD + int64_t min_diff = std::numeric_limits::max(); +>>>>>>> 03bef468a... fixed bsearch;order by parquet file;skip pages +======= +>>>>>>> 20660f098... experiments +======= + bool isSorted(parquet::format::ColumnIndex col_index,parquet::format::OffsetIndex offset_index,Type::type type_num) const { + bool sorted = false; + switch(type_num) { + case Type::BOOLEAN:{ + + break; + } + case Type::INT32:{ + int32_t* page_min_prev = (int32_t*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + int32_t* page_min_curr = (int32_t*)(void*)col_index.min_values[itemindex].c_str(); + if ( *page_min_prev <= *page_min_curr ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::INT64:{ + int64_t* page_min_prev = (int64_t*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + int64_t* page_min_curr = (int64_t*)(void*)col_index.min_values[itemindex].c_str(); + if ( *page_min_prev <= *page_min_curr ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::INT96:{ + uint32_t* page_min_prev = (uint32_t*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + uint32_t* page_min_curr = (uint32_t*)(void*)col_index.min_values[itemindex].c_str(); + if ( *page_min_prev <= *page_min_curr ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::FLOAT:{ + float* page_min_prev = (float*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + float* page_min_curr = (float*)(void*)col_index.min_values[itemindex].c_str(); + if ( *page_min_prev <= *page_min_curr ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::DOUBLE:{ + double* page_min_prev = (double*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + double* page_min_curr = (double*)(void*)col_index.min_values[itemindex].c_str(); + if ( *page_min_prev <= *page_min_curr ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::BYTE_ARRAY:{ + char* page_min_prev = (char*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + char* page_min_curr = (char*)(void*)col_index.min_values[itemindex].c_str(); + if ( strcmp(page_min_prev,page_min_curr) <= 0 ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + case Type::FIXED_LEN_BYTE_ARRAY:{ + char* page_min_prev = (char*)(void*)col_index.min_values[0].c_str(); + for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) { + char* page_min_curr = (char*)(void*)col_index.min_values[itemindex].c_str(); + if ( strcmp(page_min_prev,page_min_curr) <= 0 ){ + itemindex++; + page_min_prev = page_min_curr; + }else{ + return sorted; + } + } + sorted = true; + break; + } + default:{ + break; + } + } + return sorted; + } + + void page_bloom_filter_has_value(std::shared_ptr& source_, ReaderProperties& properties_, void* predicate, format::OffsetIndex& offset_index + , int64_t& min_index, Type::type type_num, int64_t& row_index) const { + int64_t blf_offset = offset_index.page_bloom_filter_offsets[min_index]; + std::shared_ptr stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes); + BlockSplitBloomFilter page_blf = BlockSplitBloomFilter::Deserialize(stream_.get()); + row_index = offset_index.page_locations[min_index].first_row_index; + switch(type_num) { + case Type::BOOLEAN:{ + break; + } + case Type::INT32:{ + int32_t v = *((int32_t*) predicate); + if (!page_blf.FindHash(page_blf.Hash(v))) row_index = -1; + break; + } + case Type::INT64:{ + int64_t v = *((int64_t*) predicate); + if (!page_blf.FindHash(page_blf.Hash(v))) row_index = -1; + break; + } + case Type::INT96:{ + uint32_t v = *((uint32_t*) predicate); + break; + } + case Type::FLOAT:{ + float v = *((float*) predicate); + if (!page_blf.FindHash(page_blf.Hash((float)(int64_t)v))) row_index = -1; + break; + } + case Type::DOUBLE:{ + double v = *((double*) predicate); + if (!page_blf.FindHash(page_blf.Hash((double)(int64_t)v))) row_index = -1; + break; + } + case Type::BYTE_ARRAY:{ + const char* p = (char*) predicate; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + if (!page_blf.FindHash(page_blf.Hash(&pba))) row_index = -1; + break; + } + case Type::FIXED_LEN_BYTE_ARRAY:{ + char* v = (char*) predicate; + uint8_t ptr = *v; + ByteArray pba((uint32_t)strlen(v),&ptr); + if (!page_blf.FindHash(page_blf.Hash(&pba))) row_index = -1; + break; + } + default:{ + parquet::ParquetException::NYI("type reader not implemented"); + } + } + } + + + void page_bloom_filter_has_value(std::shared_ptr& source_, ReaderProperties& properties_, void* predicate, format::OffsetIndex& offset_index + , std::vector& unsorted_min_index, Type::type type_num, std::vector& unsorted_row_index) const { + + for ( int64_t min_index: unsorted_min_index) { + int64_t blf_offset = offset_index.page_bloom_filter_offsets[min_index]; + std::shared_ptr stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes); + BlockSplitBloomFilter page_blf = BlockSplitBloomFilter::Deserialize(stream_.get()); + unsorted_row_index.push_back(offset_index.page_locations[min_index].first_row_index); + switch(type_num) { + case Type::BOOLEAN:{ + break; + } + case Type::INT32:{ + int32_t v = *((int32_t*) predicate); + if (!page_blf.FindHash(page_blf.Hash(v))) unsorted_row_index.pop_back(); + break; + } + case Type::INT64:{ + int64_t v = *((int64_t*) predicate); + if (!page_blf.FindHash(page_blf.Hash(v))) unsorted_row_index.pop_back(); + break; + } + case Type::INT96:{ + uint32_t v = *((uint32_t*) predicate); + break; + } + case Type::FLOAT:{ + float v = *((float*) predicate); + if (!page_blf.FindHash(page_blf.Hash((float)(int64_t)v))) unsorted_row_index.pop_back(); + break; + } + case Type::DOUBLE:{ + double v = *((double*) predicate); + if (!page_blf.FindHash(page_blf.Hash((double)(int64_t)v))) unsorted_row_index.pop_back(); + break; + } + case Type::BYTE_ARRAY:{ + const char* p = (char*) predicate; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + if (!page_blf.FindHash(page_blf.Hash(&pba))) unsorted_row_index.pop_back(); + break; + } + case Type::FIXED_LEN_BYTE_ARRAY:{ + const char* p = (char*) predicate; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + if (!page_blf.FindHash(page_blf.Hash(&pba))) unsorted_row_index.pop_back(); + break; + } + default:{ + parquet::ParquetException::NYI("type reader not implemented"); + } + } + } + } + + void GetPageIndex(std::shared_ptr& source_, ReaderProperties& properties_, void* predicate, + int64_t& min_index,int64_t& row_index, parquet::format::ColumnIndex col_index, + parquet::format::OffsetIndex offset_index,Type::type type_num, bool sorted, + bool with_binarysearch, int64_t& count_pages_scanned, +<<<<<<< HEAD + parquet::BlockSplitBloomFilter& blf, bool with_bloom_filter, bool with_page_bf) const { +<<<<<<< HEAD + bool sorted = isSorted(col_index,offset_index,type_num); +>>>>>>> 77931bb15... use binary search +======= +======= + bool with_bloom_filter, bool with_page_bf) const { +>>>>>>> dcf50b2dd... PARQUET-1327-bloom-filter-read-write-implementation-separate-calls + +>>>>>>> 08c315bf2... unsorted rows + switch(type_num) { + case Type::BOOLEAN:{ + // doesn't make sense for bool + break; + } + case Type::INT32:{ + int32_t v = *((int32_t*) predicate); + + + + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + int32_t* page_min_curr = (int32_t*)col_index.min_values[itemindex].c_str(); + + if ( v < *page_min_curr ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + int32_t* page_min_next = (int32_t*)col_index.min_values[itemindex+1].c_str(); + if ( v > *page_min_next ){ + begin_index += 1; + count_pages_scanned++; + } + if ( v < *page_min_next && v > *page_min_curr ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else { + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else{ + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int32_t* page_min = (int32_t*)(void *)col_index.min_values[itemindex].c_str(); + int32_t* page_max = (int32_t*)(void *)col_index.max_values[itemindex].c_str(); + int32_t max_diff = *page_max - *page_min; + + if ( *page_min <= v && v <= *page_max ) { + min_index = itemindex; + } + count_pages_scanned = itemindex; + } + min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index; + } + break; + } + case Type::INT64: + { + int64_t v = *((int64_t*) predicate); +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str(); + int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str(); +<<<<<<< HEAD +<<<<<<< HEAD + int64_t max_diff = *page_max - *page_min; +>>>>>>> 6eef203fa... generic predicate +======= + int64_t diff = *page_max - v; +>>>>>>> 03bef468a... fixed bsearch;order by parquet file;skip pages +======= + int64_t max_diff = *page_max - *page_min; +>>>>>>> 272f1189f... tests +======= + if(sorted){ +======= +======= + if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) { + row_index = -1; return; + } +======= + +>>>>>>> dcf50b2dd... PARQUET-1327-bloom-filter-read-write-implementation-separate-calls + +>>>>>>> 8c2d1c73b... bloom filter tests + if(sorted && with_binarysearch){ +>>>>>>> f82a768c4... binary search and page count + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + int64_t* page_min_curr = (int64_t*)col_index.min_values[itemindex].c_str(); + + if ( v < *page_min_curr ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if(itemindex < last_index){ + int64_t* page_min_next = (int64_t*)col_index.min_values[itemindex+1].c_str(); + if ( v > *page_min_next ){ + begin_index += 1; + count_pages_scanned++; + } + if ( v < *page_min_next && v > *page_min_curr ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else{ + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str(); + int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str(); + int64_t max_diff = *page_max - *page_min; +>>>>>>> 77931bb15... use binary search + + if ( *page_min <= v && v <= *page_max ) { + min_index = itemindex; + } + count_pages_scanned = itemindex; + } + min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index; + } + break; + } + case Type::INT96: + { + uint32_t v = *((uint32_t*) predicate); + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + uint32_t* page_min_curr = (uint32_t*)col_index.min_values[itemindex].c_str(); + + if ( v < *page_min_curr ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + uint32_t* page_min_next = (uint32_t*)col_index.min_values[itemindex+1].c_str(); + if ( v > *page_min_next ){ + begin_index += 1; + count_pages_scanned++; + } + if ( v < *page_min_next && v > *page_min_curr ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else { + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + uint32_t* page_min = (uint32_t*)(void *)col_index.min_values[itemindex].c_str(); + uint32_t* page_max = (uint32_t*)(void *)col_index.max_values[itemindex].c_str(); + uint32_t max_diff = (*page_max - *page_min); + + if ( *page_min <= v && max_diff >= (uint32_t) abs(v - *page_min) ) { + min_index = itemindex; + count_pages_scanned = itemindex; + } + } + } + break; + } + case Type::FLOAT: + { + float v = *((float*) predicate); + + + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + float* page_min_curr = (float*)col_index.min_values[itemindex].c_str(); + + if ( v < *page_min_curr ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + float* page_min_next = (float*)col_index.min_values[itemindex+1].c_str(); + if ( v > *page_min_next ){ + begin_index += 1; + count_pages_scanned++; + } + if ( v < *page_min_next && v > *page_min_curr ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else{ + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + float* page_min = (float*)(void *)col_index.min_values[itemindex].c_str(); + float* page_max = (float*)(void *)col_index.max_values[itemindex].c_str(); + + auto epsilon = std::numeric_limits::epsilon(); + float error_factor = 9*pow(10,15); + float max_diff = *page_max - *page_min; + + if ( *page_min < v && v < *page_max ) { + min_index = itemindex; + } + count_pages_scanned = itemindex; + } + min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index; + } + break; + } + case Type::DOUBLE: + { + double v = *((double*) predicate); + + + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + double* page_min_curr = (double*)col_index.min_values[itemindex].c_str(); + + if ( v < *page_min_curr ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + double* page_min_next = (double*)col_index.min_values[itemindex+1].c_str(); + if ( v > *page_min_next ){ + begin_index += 1; + count_pages_scanned++; + } + if ( v < *page_min_next && v > *page_min_curr ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else{ + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + double* page_min = (double*)(void *)col_index.min_values[itemindex].c_str(); + double* page_max = (double*)(void *)col_index.max_values[itemindex].c_str(); + double max_diff = *page_max - *page_min; + + auto epsilon = std::numeric_limits::epsilon(); + double error_factor = 9*pow(10,15); + + if ( *page_min < v && v < *page_max ) { + min_index = itemindex; + } + count_pages_scanned = itemindex; + } + min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index; + } + break; + } + case Type::BYTE_ARRAY: + { + char* v = (char*) predicate; + char* p = (char*) predicate; + // remove leading zeroes in the predicate, if present. + int checkzero = 0; + while ( p [checkzero] == '0') checkzero++; + p = (p + checkzero); + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + + + std::string str(v); + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + std::string page_min_curr_orig = (std::string)col_index.min_values[itemindex].c_str(); + std::string page_min_curr(page_min_curr_orig.substr(page_min_curr_orig.length() - str.length(),str.length())); + if ( test.compare(page_min_curr_orig) < 0 ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + std::string page_min_next_orig = (std::string)col_index.min_values[itemindex+1].c_str(); + std::string page_min_next(page_min_next_orig.substr(page_min_curr_orig.length() - str.length(),str.length())); + if ( test.compare(page_min_next_orig) > 0 ){ + begin_index += 1; + count_pages_scanned++; + } + if ( test.compare(page_min_next_orig) < 0 && test.compare(page_min_curr_orig) > 0 ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else { + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + std::string page_min_orig = (std::string)col_index.min_values[itemindex].c_str(); + std::string page_max_orig = (std::string)col_index.max_values[itemindex].c_str(); + std::string page_min(page_min_orig.substr(page_min_orig.length()-str.length(),str.length())); + std::string page_max(page_max_orig.substr(page_max_orig.length()-str.length(),str.length())); + + if ( test.compare(page_min_orig) > 0 && test.compare(page_max_orig) < 0 ) { + min_index = itemindex; + } + count_pages_scanned = itemindex; + } + min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index; + } + break; + } + case Type::FIXED_LEN_BYTE_ARRAY: + { + char* v = (char*) predicate; + + uint8_t ptr = *v; + ByteArray pba((uint32_t)strlen(v),&ptr); + + + std::string str(v); + if(sorted && with_binarysearch){ + if(col_index.min_values.size() >= 2){ + uint64_t last_index = col_index.min_values.size()-1; + uint64_t begin_index = 0; + uint64_t itemindex = (begin_index + last_index)/2; + + while(begin_index <= last_index) { + itemindex = (begin_index + last_index)/2; + std::string page_min_curr = (std::string)col_index.min_values[itemindex].c_str(); + + if ( str.compare(page_min_curr) < 0 ){ + last_index -= 1; + count_pages_scanned++; + continue; + } + if ( itemindex < last_index ){ + std::string page_min_next = (std::string)col_index.min_values[itemindex+1].c_str(); + if ( str.compare(page_min_next) > 0 ){ + begin_index += 1; + count_pages_scanned++; + } + if ( str.compare(page_min_next) < 0 && str.compare(page_min_curr) > 0 ){ + begin_index = last_index + 1; + count_pages_scanned++; + } + }else{ + begin_index = last_index + 1; + count_pages_scanned++; + } + } + min_index = itemindex; + } + else + { + min_index = 0; + } + } + else { + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + std::string page_min = col_index.min_values[itemindex]; + std::string page_max = col_index.max_values[itemindex]; + + if ( str.compare(page_min)>0 && str.compare(page_max)<0 ) { + min_index = itemindex; + count_pages_scanned = itemindex; + } + } + } + break; + } + default: + { + parquet::ParquetException::NYI("type reader not implemented"); + } + } + + if (with_page_bf) + page_bloom_filter_has_value(source_,properties_,predicate, offset_index,min_index,type_num, row_index); + else + row_index = offset_index.page_locations[min_index].first_row_index; + } + + + void GetPageIndex(std::shared_ptr& source_, ReaderProperties& properties_, void* predicate, + std::vector& unsorted_min_index, std::vector& unsorted_row_index, + parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index, + Type::type type_num, bool sorted, bool with_binarysearch, int64_t& count_pages_scanned, + bool with_bloom_filter, bool with_page_bf) const { + + switch(type_num) { + case Type::BOOLEAN:{ + // doesn't make sense for bool + break; + } + case Type::INT32:{ + int32_t v = *((int32_t*) predicate); + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int32_t* page_min = (int32_t*)(void *)col_index.min_values[itemindex].c_str(); + int32_t* page_max = (int32_t*)(void *)col_index.max_values[itemindex].c_str(); + int32_t max_diff = *page_max - *page_min; + + if ( *page_min <= v && max_diff >= abs(v - *page_min) ) { + unsorted_min_index.push_back(itemindex); + count_pages_scanned = itemindex; + } + } + break; + } + case Type::INT64: + { + int64_t v = *((int64_t*) predicate); + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str(); + int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str(); + int64_t max_diff = *page_max - *page_min; + + if ( *page_min <= v && max_diff >= abs(v - *page_min) ) { + unsorted_min_index.push_back(itemindex); + count_pages_scanned = itemindex; + } + } + + break; + } + case Type::INT96: + { + break; + } + case Type::FLOAT: + { + float v = *((float*) predicate); + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + float* page_min = (float*)(void *)col_index.min_values[itemindex].c_str(); + float* page_max = (float*)(void *)col_index.max_values[itemindex].c_str(); + + auto epsilon = std::numeric_limits::epsilon(); + float error_factor = 9*pow(10,15); + float max_diff = *page_max - *page_min; + + if ( fabs(max_diff - (fabs(v-*page_min)+fabs(*page_max-v))) <= error_factor*epsilon ) { + + unsorted_min_index.push_back(itemindex); + count_pages_scanned = itemindex; + + } + } + + break; + } + case Type::DOUBLE: + { + double v = *((double*) predicate); + + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + double* page_min = (double*)(void *)col_index.min_values[itemindex].c_str(); + double* page_max = (double*)(void *)col_index.max_values[itemindex].c_str(); + double max_diff = *page_max - *page_min; + + auto epsilon = std::numeric_limits::epsilon(); + double error_factor = 9*pow(10,15); + + if ( fabs(max_diff - (fabs(v-*page_min)+fabs(*page_max-v))) <= error_factor*epsilon ) { + + unsorted_min_index.push_back(itemindex); + count_pages_scanned = itemindex; + } + } + + break; + } + case Type::BYTE_ARRAY: + { + char* v = (char*) predicate; + + const char* p = (char*) predicate; + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + + std::string str(v); + + for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) { + std::string page_min_orig = col_index.min_values[itemindex]; + std::string page_max_orig = col_index.max_values[itemindex]; + std::string page_min(page_min_orig.substr(page_min_orig.length()-str.length(),str.length())); + std::string page_max(page_max_orig.substr(page_max_orig.length()-str.length(),str.length())); + + if ( test.compare(page_min_orig)>0 && test.compare(page_max_orig)<0 ) { + unsorted_min_index.push_back(itemindex); + count_pages_scanned = itemindex; + } + } + + break; + } + case Type::FIXED_LEN_BYTE_ARRAY: + { + + break; + } + default: + { + parquet::ParquetException::NYI("type reader not implemented"); + } + } + + if (with_page_bf) + page_bloom_filter_has_value(source_,properties_,predicate, offset_index,unsorted_min_index,type_num, unsorted_row_index); + else { + for (int64_t min_index : unsorted_min_index) + unsorted_row_index.push_back(offset_index.page_locations[min_index].first_row_index); + } + + } + + + void GetPageWithoutIndex(std::shared_ptr& source_, ReaderProperties& properties_, void* predicate, + int64_t& min_index,int64_t& row_index, Type::type type_num, + bool with_binarysearch, int64_t& count_pages_scanned, + parquet::BlockSplitBloomFilter& blf, bool with_bloom_filter, bool with_page_bf) const { + + switch(type_num) { + case Type::BOOLEAN:{ + // doesn't make sense for bool + break; + } + case Type::INT32:{ + int32_t v = *((int32_t*) predicate); + + if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) { + row_index = -1; return; + } + + + break; + } + case Type::INT64: + { + int64_t v = *((int64_t*) predicate); + if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) { + row_index = -1; return; + } + + + break; + } + case Type::INT96: + { + uint32_t v = *((uint32_t*) predicate); + + break; + } + case Type::FLOAT: + { + float v = *((float*) predicate); + if (with_bloom_filter && !blf.FindHash(blf.Hash((float)(int64_t)v))) { + row_index = -1; return; + } + + + break; + } + case Type::DOUBLE: + { + double v = *((double*) predicate); + if (with_bloom_filter && !blf.FindHash(blf.Hash((double)(int64_t)v))) { + row_index = -1; return; + } + + + break; + } + case Type::BYTE_ARRAY: + { + char* v = (char*) predicate; + char* p = (char*) predicate; + // remove leading zeroes in the predicate, if present. + int checkzero = 0; + while ( p [checkzero] == '0') checkzero++; + p = (p + checkzero); + char dest[FIXED_LENGTH]; + for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0'; + for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))]; + dest[FIXED_LENGTH] = '\0'; + std::string test(dest); + ByteArray pba(test.size(),reinterpret_cast(test.c_str())); + if (with_bloom_filter && !blf.FindHash(blf.Hash(&pba))) { + row_index = -1; return; + } + + std::string str(v); + + break; + } + case Type::FIXED_LEN_BYTE_ARRAY: + { + char* v = (char*) predicate; + + uint8_t ptr = *v; + ByteArray pba((uint32_t)strlen(v),&ptr); + if (with_bloom_filter && !blf.FindHash(blf.Hash(&pba))) { + row_index = -1; return; + } + + std::string str(v); + + break; + } + default: + { + parquet::ParquetException::NYI("type reader not implemented"); + } + } + + /*if (with_page_bf) + page_bloom_filter_has_value(source_,properties_,predicate, offset_index,min_index,type_num, row_index);*/ + + } + + void GetPageWithRowIndex(int64_t& page_index, parquet::format::OffsetIndex offset_index, int64_t& row_index) const { + + for (uint64_t page_index = 0;page_index < offset_index.page_locations.size() && + offset_index.page_locations[page_index].first_row_index!=row_index;page_index++) { + + } + } + + + +/// ---- Page filtering ---- +/// A Parquet file can contain a so called "page index". It has two parts, a column index +/// and an offset index. The column index contains statistics like minimum and maximum +/// values for each page. The offset index contains information about page locations in +/// the Parquet file and top-level row ranges. HdfsParquetScanner evaluates the min/max +/// conjuncts against the column index and determines the surviving pages with the help of +/// the offset index. Then it will configure the column readers to only scan the pages +/// and row ranges that have a chance to store rows that pass the conjuncts. + + + bool HasPageIndex(ColumnChunkMetaData* col) { + + int64_t column_index_offset = col->column_index_offset(); + int64_t offset_index_offset = col->offset_index_offset(); + int64_t column_index_length = col->column_index_length(); + int64_t offset_index_length = col->offset_index_length(); + + int64_t ci_start = std::numeric_limits::max(); + int64_t oi_start = std::numeric_limits::max(); + int64_t ci_end = -1; + int64_t oi_end = -1; + + if (column_index_offset && column_index_length){ + ci_start = std::min(ci_start, column_index_offset); + ci_end = std::max(ci_end, column_index_offset + column_index_length); + } + if (offset_index_offset && offset_index_length) { + oi_start = std::min(oi_start, offset_index_offset); + oi_end = std::max(oi_end, offset_index_offset + offset_index_length); + } + return oi_end != -1 && ci_end != -1; std::unique_ptr crypto_metadata = col->crypto_metadata(); // Column is encrypted only if crypto_metadata exists. @@ -166,9 +1272,70 @@ class SerializedRowGroup : public RowGroupReader::Contents { throw ParquetException("RowGroup is noted as encrypted but no file decryptor"); } + void DeserializeColumnIndex(const ColumnChunkMetaData& col_chunk, parquet::format::ColumnIndex* column_index, std::shared_ptr& source_, ReaderProperties& properties_) { + int64_t ci_start = std::numeric_limits::max(); + int64_t ci_end = std::numeric_limits::max(); + string_view page_buffer; + ci_start = std::min(ci_start,col_chunk.column_index_offset()); + ci_end = std::max(ci_end,col_chunk.column_index_offset() + col_chunk.column_index_length()); + int8_t buffer_offset = col_chunk.column_index_offset() - ci_start; + uint32_t length = col_chunk.column_index_length(); + + std::shared_ptr stream_ = properties_.GetStream(source_, ci_start, length); + PARQUET_THROW_NOT_OK(stream_->Peek(kColumnIndexReadSize,&page_buffer)); + if (page_buffer.size() == 0) { + return; + } + + DeserializeThriftMsg(reinterpret_cast(page_buffer.data()), &length, column_index); + } + + void DeserializeOffsetIndex(const ColumnChunkMetaData& col_chunk, parquet::format::OffsetIndex* offset_index, std::shared_ptr& source_, ReaderProperties& properties_) { + int64_t oi_start = std::numeric_limits::max(); + int64_t oi_end = std::numeric_limits::max(); + string_view page_buffer; + oi_start = std::min(oi_start,col_chunk.offset_index_offset()); + oi_end = std::min(oi_end, col_chunk.offset_index_offset() + col_chunk.offset_index_length()); + int8_t buffer_offset = col_chunk.offset_index_offset() - oi_start; + uint32_t length = col_chunk.offset_index_length(); + + std::shared_ptr stream_ = properties_.GetStream(source_, oi_start, length); + PARQUET_THROW_NOT_OK(stream_->Peek(kOffsetIndexReadSize, &page_buffer)); + if (page_buffer.size() == 0) { + return; + } + + DeserializeThriftMsg(reinterpret_cast(page_buffer.data()), &length, offset_index); + } + + void DeserializeBloomFilter(const ColumnChunkMetaData& col_chunk, parquet::BlockSplitBloomFilter& blf, std::shared_ptr& source_, ReaderProperties& properties_) { + int64_t blf_offset = col_chunk.bloom_filter_offset(); + std::shared_ptr stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes); + blf = BlockSplitBloomFilter::Deserialize(stream_.get()); + } + + std::unique_ptr GetColumnPageReaderWithIndex(int column_index, void* predicate, int64_t& min_index, + int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool with_binarysearch, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index, + parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf, + bool& first_time_blf,bool& first_time_index, + float& blf_load_time, float& index_load_time) { + // Read column chunk from the file + auto col = row_group_metadata_->ColumnChunk(column_index); + +<<<<<<< HEAD constexpr auto kEncryptedRowGroupsLimit = 32767; if (i > kEncryptedRowGroupsLimit) { throw ParquetException("Encrypted files cannot contain more than 32767 row groups"); +======= + auto sorting_columns = row_group_metadata_->sorting_columns(); + + int64_t col_start = col->data_page_offset(); + if (col->has_dictionary_page() && col->dictionary_page_offset() > 0 && + col_start > col->dictionary_page_offset()) { + col_start = col->dictionary_page_offset(); +>>>>>>> 5f0c77973... sorting columns } // The column is encrypted @@ -182,6 +1349,63 @@ class SerializedRowGroup : public RowGroupReader::Contents { static_cast(i), meta_decryptor, data_decryptor); return PageReader::Open(stream, col->num_values(), col->compression(), properties_.memory_pool(), &ctx); + int64_t col_length = col->total_compressed_size(); + + if ( with_bloom_filter ) { + + if (first_time_blf) { + auto start_time = std::chrono::high_resolution_clock::now(); + DeserializeBloomFilter(*reinterpret_cast(col.get()),blf,source_,properties_); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + first_time_blf = false; + blf_load_time = (float) duration.count(); + } + + GetPageWithoutIndex(source_, properties_, predicate, min_index,row_index,type_num, with_binarysearch, count_pages_scanned, blf, with_bloom_filter, with_page_bf); + } + + if (row_index != -1 && with_index ){ + bool has_page_index = HasPageIndex((reinterpret_cast(col.get()))); + if ( has_page_index ) { + + if (first_time_index) { + auto start_time = std::chrono::high_resolution_clock::now(); + DeserializeColumnIndex(*reinterpret_cast(col.get()),&col_index, source_, properties_); + DeserializeOffsetIndex(*reinterpret_cast(col.get()),&offset_index, source_, properties_); + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(start_time-end_time); + index_load_time = (float) duration.count(); + first_time_index = false; + } + + total_num_pages = offset_index.page_locations.size(); + last_first_row = offset_index.page_locations[offset_index.page_locations.size()-1].first_row_index; + if ( predicate_col == column_index ) { + bool sorted = isSorted(col_index,offset_index,type_num); + if ( sorted ) + GetPageIndex(source_, properties_, predicate, min_index,row_index, col_index,offset_index,type_num,sorted, with_binarysearch, count_pages_scanned, with_bloom_filter, with_page_bf); + else + { + GetPageIndex(source_, properties_, predicate, unsorted_min_index,unsorted_row_index, col_index,offset_index,type_num,sorted, with_binarysearch, count_pages_scanned, with_bloom_filter, with_page_bf); + } + + } + else + GetPageWithRowIndex(min_index, offset_index, row_index); + } + } + // PARQUET-816 workaround for old files created by older parquet-mr + const ApplicationVersion& version = file_metadata_->writer_version(); + if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) { + // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the + // dictionary page header size in total_compressed_size and total_uncompressed_size + // (see IMPALA-694). We add padding to compensate. + int64_t size = -1; + PARQUET_THROW_NOT_OK(source_->GetSize(&size)); + int64_t bytes_remaining = size - (col_start + col_length); + int64_t padding = std::min(kMaxDictHeaderSize, bytes_remaining); + col_length += padding; } // The column is encrypted with its own key @@ -197,6 +1421,52 @@ class SerializedRowGroup : public RowGroupReader::Contents { static_cast(i), meta_decryptor, data_decryptor); return PageReader::Open(stream, col->num_values(), col->compression(), properties_.memory_pool(), &ctx); + std::shared_ptr stream = + properties_.GetStream(source_, col_start , col_length); + + return PageReader::Open(stream, col->num_values(), col->compression(), + properties_.memory_pool()); + } + + + + std::unique_ptr GetColumnPageReader(int i) override { + // Read column chunk from the file + auto col = row_group_metadata_->ColumnChunk(i); + + int64_t col_start = col->data_page_offset(); + if (col->has_dictionary_page() && col->dictionary_page_offset() > 0 && + col_start > col->dictionary_page_offset()) { + col_start = col->dictionary_page_offset(); + } + + int64_t col_length = col->total_compressed_size(); + + bool has_page_index = HasPageIndex((reinterpret_cast(col.get()))); + if ( has_page_index ) { + parquet::format::ColumnIndex col_index; + parquet::format::OffsetIndex offset_index; + DeserializeColumnIndex(*reinterpret_cast(col.get()),&col_index, source_, properties_); + DeserializeOffsetIndex(*reinterpret_cast(col.get()),&offset_index, source_, properties_); + } + + // PARQUET-816 workaround for old files created by older parquet-mr + const ApplicationVersion& version = file_metadata_->writer_version(); + if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) { + // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the + // dictionary page header size in total_compressed_size and total_uncompressed_size + // (see IMPALA-694). We add padding to compensate. + int64_t size = -1; + PARQUET_THROW_NOT_OK(source_->GetSize(&size)); + int64_t bytes_remaining = size - (col_start + col_length); + int64_t padding = std::min(kMaxDictHeaderSize, bytes_remaining); + col_length += padding; + } + + std::shared_ptr stream = + properties_.GetStream(source_, col_start, col_length); + return PageReader::Open(stream, col->num_values(), col->compression(), + properties_.memory_pool()); } private: diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h index 79af3cd2b35..a85da6d28dc 100644 --- a/cpp/src/parquet/file_reader.h +++ b/cpp/src/parquet/file_reader.h @@ -26,6 +26,7 @@ #include "parquet/metadata.h" // IWYU pragma: keep #include "parquet/platform.h" #include "parquet/properties.h" +#include "parquet/bloom_filter.h" namespace parquet { @@ -35,6 +36,8 @@ class PageReader; class RandomAccessSource; class RowGroupMetaData; +static const uint32_t FIXED_LENGTH = 124; + class PARQUET_EXPORT RowGroupReader { public: // Forward declare a virtual class 'Contents' to aid dependency injection and more @@ -43,6 +46,12 @@ class PARQUET_EXPORT RowGroupReader { struct Contents { virtual ~Contents() {} virtual std::unique_ptr GetColumnPageReader(int i) = 0; + virtual std::unique_ptr GetColumnPageReaderWithIndex(int i,void* predicate, int64_t& min_index, int predicate_Col, int64_t& row_index,Type::type type_num,bool with_index, bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index, + parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf, + bool& first_time_blf,bool& first_time_index, + float& blf_load_time, float& index_load_time) = 0; virtual const RowGroupMetaData* metadata() const = 0; virtual const ReaderProperties* properties() const = 0; }; @@ -56,11 +65,33 @@ class PARQUET_EXPORT RowGroupReader { // column. Ownership is shared with the RowGroupReader. std::shared_ptr Column(int i); + std::shared_ptr ColumnWithIndex(int i,void* predicate, int64_t& min_index, int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index); + std::unique_ptr GetColumnPageReader(int i); + std::unique_ptr GetColumnPageReaderWithIndex(int column_index, void* predicate, int64_t& min_index , int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned, + int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf, + std::vector& unsorted_min_index, std::vector& unsorted_row_index, + parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf, + bool& first_time_blf,bool& first_time_index, + float& blf_load_time, float& index_load_time); + + float GetBLFLoadTime() { return blf_load_time; }; + + float GetIndexLoadTime() { return index_load_time; }; + private: // Holds a pointer to an instance of Contents implementation std::unique_ptr contents_; + parquet::format::ColumnIndex col_index; + parquet::format::OffsetIndex offset_index; + BlockSplitBloomFilter blf; + bool first_time_blf = true; + bool first_time_index = true; + float blf_load_time = 0.0; + float index_load_time = 0.0; }; class PARQUET_EXPORT ParquetFileReader { diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc index 63dabe76c95..5435856e957 100644 --- a/cpp/src/parquet/file_writer.cc +++ b/cpp/src/parquet/file_writer.cc @@ -50,8 +50,52 @@ void RowGroupWriter::Close() { } } +void RowGroupWriter::CloseWithIndex(bool use_index, bool with_bf) { + if (contents_) { + contents_->CloseWithIndex(use_index, with_bf); + } +} + +void RowGroupWriter::AppendRowGroupBloomFilter(int32_t values) { + if (contents_) { + contents_->AppendRowGroupBloomFilter(values); + } +} + +void RowGroupWriter::AppendRowGroupBloomFilter(int64_t values) { + if (contents_) { + contents_->AppendRowGroupBloomFilter(values); + } +} + +void RowGroupWriter::AppendRowGroupBloomFilter(float values) { + if (contents_) { + contents_->AppendRowGroupBloomFilter(values); + } +} + +void RowGroupWriter::AppendRowGroupBloomFilter(double values) { + if (contents_) { + contents_->AppendRowGroupBloomFilter(values); + } +} + +void RowGroupWriter::AppendRowGroupBloomFilter(ByteArray* values) { + if (contents_) { + contents_->AppendRowGroupBloomFilter(values); + } +} + +void RowGroupWriter::InitBloomFilter(int num_rows,uint32_t& num_bytes, float fpp) { + if (contents_) { + contents_->InitBloomFilter(num_rows,num_bytes, fpp); + } +} + ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); } +ColumnWriter* RowGroupWriter::NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) { return contents_->NextColumnWithIndex(num_bytes,with_index, with_bf, fpp); } + ColumnWriter* RowGroupWriter::column(int i) { return contents_->column(i); } int64_t RowGroupWriter::total_compressed_bytes() const { @@ -93,7 +137,9 @@ class RowGroupSerializer : public RowGroupWriter::Contents { next_column_index_(0), num_rows_(0), buffered_row_group_(buffered_row_group), - file_encryptor_(file_encryptor) { + file_encryptor_(file_encryptor, + blf_ (metadata_->num_columns()) { + if (buffered_row_group) { InitColumns(); } else { @@ -143,6 +189,49 @@ class RowGroupSerializer : public RowGroupWriter::Contents { return column_writers_[0].get(); } + ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) override { + use_index = true; + if (buffered_row_group_) { + throw ParquetException( + "NextColumn() is not supported when a RowGroup is written by size"); + } + + if (column_writers_[0]) { + CheckRowsWritten(); + } + + // Throws an error if more columns are being written + auto col_meta = metadata_->NextColumnChunk(); + + if (column_writers_[0]) { + total_bytes_written_ += (with_index)? column_writers_[0]->CloseWithIndex(): column_writers_[0]->Close(); + sink_->Tell(&file_pos_); + if (with_index) + column_writers_[0]->WriteIndex(file_pos_,column_index_offset,offset_index_offset); + } + + //total_bytes_written_ += blf_[next_column_index_].GetBitsetSize(); + if ( column_writers_[0] && with_bf ) { + // next column bloom filter initialized + num_bytes = blf_[next_column_index_].OptimalNumOfBits(column_writers_[0]->rows_written() , fpp); + blf_[next_column_index_].Init(num_bytes); + + //current column writer saved for future use + all_used_cws_.push_back(column_writers_[0]); + } + + ++next_column_index_; + + const ColumnDescriptor* column_descr = col_meta->descr(); + std::unique_ptr pager = + PageWriter::Open(sink_, properties_->compression(column_descr->path()), col_meta, + properties_->memory_pool()); + column_writers_[0] = ColumnWriter::Make(col_meta, std::move(pager), properties_); + + + return column_writers_[0].get(); + } + ColumnWriter* column(int i) override { if (!buffered_row_group_) { throw ParquetException( @@ -185,18 +274,97 @@ class RowGroupSerializer : public RowGroupWriter::Contents { for (size_t i = 0; i < column_writers_.size(); i++) { if (column_writers_[i]) { total_bytes_written_ += column_writers_[i]->Close(); - column_writers_[i].reset(); } } + column_writers_.clear(); + + // Ensures all columns have been written + metadata_->set_num_rows(num_rows_); + metadata_->Finish(total_bytes_written_); + } + } + + void CloseWithIndex(bool use_index, bool with_bf) override { + if (!closed_) { + closed_ = true; + CheckRowsWritten(); + for (size_t i = 0; i < column_writers_.size(); i++) { + if (column_writers_[i]) { + total_bytes_written_ += (!use_index)? column_writers_[i]->Close(): column_writers_[i]->CloseWithIndex(); + if (use_index) + column_writers_[i]->WriteIndex(0,column_index_offset,offset_index_offset); + + all_used_cws_.push_back(column_writers_[i]); + column_writers_[i].reset(); + } + } + + if ( with_bf ) + WriteBloomFilterOffsets(); column_writers_.clear(); // Ensures all columns have been written metadata_->set_num_rows(num_rows_); metadata_->Finish(total_bytes_written_, row_group_ordinal_); + metadata_->Finish(total_bytes_written_); + + } } + void AppendRowGroupBloomFilter(int32_t values) override { + blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values)); + } + + void AppendRowGroupBloomFilter(int64_t values) override { + blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values)); + } + + void AppendRowGroupBloomFilter(float values) override { + blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash((float)(int64_t)values)); + } + + void AppendRowGroupBloomFilter(double values) override { + blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash((double)(int64_t)values)); + } + + void AppendRowGroupBloomFilter(ByteArray* values) override { + blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values)); + } + + + void InitBloomFilter(int num_rows,uint32_t& num_bytes, float fpp) override { + // first time column initialization, not possible in nextcolumnchunk + num_bytes = blf_[next_column_index_].OptimalNumOfBits(num_rows , fpp); + blf_[next_column_index_].Init(num_bytes); + } + + void WriteBloomFilterOffsets(){ + int64_t filepos; + for (size_t i = 0; i < all_used_cws_.size(); i++) { + sink_->Tell(&filepos); + if (all_used_cws_[i]) { + if (false) { + format::BloomFilterHeader blfh; + blfh.__set_numBytes(blf_[i].GetBitsetSize()); + blfh.__set_hash(blf_[i].GetHashStrategy()); + blfh.__set_algorithm(blf_[i].GetHashAlgorithm()); + blfh.__set_compression(blf_[i].GetBFCompression()); + + std::unique_ptr thrift_serializer_; + thrift_serializer_.reset(new ThriftSerializer); + thrift_serializer_->Serialize(&blfh, sink_.get()); + } + + blf_[i].WriteTo(sink_.get()); + + all_used_cws_[i]->WriteBloomFilterOffset(filepos); + all_used_cws_[i].reset(); + } + } + } + private: std::shared_ptr sink_; mutable RowGroupMetaDataBuilder* metadata_; @@ -208,6 +376,8 @@ class RowGroupSerializer : public RowGroupWriter::Contents { mutable int64_t num_rows_; bool buffered_row_group_; InternalFileEncryptor* file_encryptor_; + bool use_index = false; + void CheckRowsWritten() const { // verify when only one column is written at a time @@ -252,6 +422,11 @@ class RowGroupSerializer : public RowGroupWriter::Contents { } std::vector> column_writers_; + std::vector> all_used_cws_; + int64_t column_index_offset = 0; + int64_t offset_index_offset = 0; + std::vector blf_; + int64_t file_pos_; }; // ---------------------------------------------------------------------- @@ -296,6 +471,23 @@ class FileSerializer : public ParquetFileWriter::Contents { } } + void CloseWithIndex(bool use_index, bool with_bf) override { + if (is_open_) { + // If any functions here raise an exception, we set is_open_ to be false + // so that this does not get called again (possibly causing segfault) + is_open_ = false; + if (row_group_writer_) { + num_rows_ += row_group_writer_->num_rows(); + row_group_writer_->CloseWithIndex(use_index, with_bf); + } + row_group_writer_.reset(); + + // Write magic bytes and metadata + file_metadata_ = metadata_->Finish(); + WriteFileMetaData(*file_metadata_, sink_.get()); + } + } + int num_columns() const override { return schema_.num_columns(); } int num_row_groups() const override { return num_row_groups_; } @@ -555,6 +747,14 @@ void ParquetFileWriter::Close() { } } +void ParquetFileWriter::CloseWithIndex(bool use_index, bool with_bf) { + if (contents_) { + contents_->CloseWithIndex(use_index, with_bf); + file_metadata_ = contents_->metadata(); + contents_.reset(); + } +} + RowGroupWriter* ParquetFileWriter::AppendRowGroup() { return contents_->AppendRowGroup(); } @@ -567,6 +767,7 @@ RowGroupWriter* ParquetFileWriter::AppendRowGroup(int64_t num_rows) { return AppendRowGroup(); } + const std::shared_ptr& ParquetFileWriter::properties() const { return contents_->properties(); } diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h index 8caa5efbab5..48d6f6e80b8 100644 --- a/cpp/src/parquet/file_writer.h +++ b/cpp/src/parquet/file_writer.h @@ -47,12 +47,29 @@ class PARQUET_EXPORT RowGroupWriter { // to be used only with ParquetFileWriter::AppendRowGroup virtual ColumnWriter* NextColumn() = 0; + + virtual ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) = 0; + + virtual void AppendRowGroupBloomFilter(int32_t values) = 0; + + virtual void AppendRowGroupBloomFilter(int64_t values) = 0; + + virtual void AppendRowGroupBloomFilter(float values) = 0; + + virtual void AppendRowGroupBloomFilter(double values) = 0; + + virtual void AppendRowGroupBloomFilter(ByteArray* values) = 0; + + virtual void InitBloomFilter(int num_rows,uint32_t& blf_num_bits, float fpp) = 0; + // to be used only with ParquetFileWriter::AppendBufferedRowGroup virtual ColumnWriter* column(int i) = 0; virtual int current_column() const = 0; virtual void Close() = 0; + virtual void CloseWithIndex(bool use_index, bool with_bf) = 0; + // total bytes written by the page writer virtual int64_t total_bytes_written() const = 0; // total bytes still compressed but not written @@ -69,10 +86,30 @@ class PARQUET_EXPORT RowGroupWriter { /// directly written to the sink, once a new column is started, the contents /// of the previous one cannot be modified anymore. ColumnWriter* NextColumn(); +<<<<<<< HEAD /// Index of currently written column. Equal to -1 if NextColumn() /// has not been called yet. +======= + + ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes, bool with_index, bool with_bf, float fpp); + + void AppendRowGroupBloomFilter(int32_t values); + + void AppendRowGroupBloomFilter(int64_t values); + + void AppendRowGroupBloomFilter(float values); + + void AppendRowGroupBloomFilter(double values); + + void AppendRowGroupBloomFilter(ByteArray* values); + + void InitBloomFilter(int num_rows,uint32_t& blf_num_bits, float fpp); + + /// Index of currently written column +>>>>>>> c0fbc925b... write index int current_column(); void Close(); + void CloseWithIndex(bool use_index, bool with_bf); int num_columns() const; @@ -141,6 +178,8 @@ class PARQUET_EXPORT ParquetFileWriter { // Perform any cleanup associated with the file contents virtual void Close() = 0; + virtual void CloseWithIndex(bool use_index, bool with_bf) = 0; + /// \note Deprecated since 1.3.0 RowGroupWriter* AppendRowGroup(int64_t num_rows); @@ -187,6 +226,8 @@ class PARQUET_EXPORT ParquetFileWriter { void Open(std::unique_ptr contents); void Close(); + void CloseWithIndex(bool use_index, bool with_bf); + // Construct a RowGroupWriter for the indicated number of rows. // // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid @@ -209,6 +250,7 @@ class PARQUET_EXPORT ParquetFileWriter { /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close. RowGroupWriter* AppendBufferedRowGroup(); + /// Number of columns. /// /// This number is fixed during the lifetime of the writer as it is determined via diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index cdfe505573f..c8e2bbdbdec 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -323,6 +323,14 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } } + inline int64_t column_index_length() const { + return column_->column_index_length; + } + + inline int64_t bloom_filter_offset() const { + return column_->meta_data.bloom_filter_offset; + } + private: mutable std::shared_ptr possible_stats_; std::vector encodings_; @@ -417,6 +425,23 @@ std::unique_ptr ColumnChunkMetaData::crypto_metadata() con return impl_->crypto_metadata(); } +int64_t ColumnChunkMetaData::offset_index_offset() const { + return impl_->offset_index_offset(); +} + +int64_t ColumnChunkMetaData::offset_index_length() const { + return impl_->offset_index_length(); +} + +int64_t ColumnChunkMetaData::column_index_length() const { + return impl_->column_index_length(); +} + +int64_t ColumnChunkMetaData::bloom_filter_offset() const { + return impl_->bloom_filter_offset(); +} + + // row-group metadata class RowGroupMetaData::RowGroupMetaDataImpl { public: @@ -440,6 +465,7 @@ class RowGroupMetaData::RowGroupMetaDataImpl { inline int64_t total_compressed_size() const { return row_group_->total_compressed_size; } + inline std::vector sorting_columns() { return row_group_->sorting_columns; } inline const SchemaDescriptor* schema() const { return schema_; } @@ -487,6 +513,8 @@ int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_siz int64_t RowGroupMetaData::file_offset() const { return impl_->file_offset(); } +std::vector RowGroupMetaData::sorting_columns() const { return impl_->sorting_columns(); } + const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); } std::unique_ptr RowGroupMetaData::ColumnChunk(int i) const { @@ -1127,6 +1155,18 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { } } + void WriteIndex(int64_t& file_pos_, int64_t& column_index_offset, int64_t& offset_index_offset, uint32_t& ci_len, uint32_t& oi_len) { + column_chunk_->__set_column_index_offset(file_pos_+column_index_offset); + column_chunk_->__set_column_index_length(ci_len); + column_chunk_->__set_offset_index_offset(file_pos_+offset_index_offset); + column_chunk_->__set_offset_index_length(oi_len); + file_pos_ += offset_index_offset+oi_len; + } + + void WriteBloomFilterOffset(int64_t& bloom_filter_offset) { + column_chunk_->meta_data.__set_bloom_filter_offset(bloom_filter_offset); + } + void WriteTo(::arrow::io::OutputStream* sink) { ThriftSerializer serializer; serializer.Serialize(column_chunk_, sink); @@ -1203,6 +1243,14 @@ void ColumnChunkMetaDataBuilder::WriteTo(::arrow::io::OutputStream* sink) { impl_->WriteTo(sink); } +void ColumnChunkMetaDataBuilder::WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, uint32_t& ci_len, uint32_t& oi_len) { + impl_->WriteIndex(file_pos_,ci_offset, oi_offset,ci_len,oi_len); +} + +void ColumnChunkMetaDataBuilder::WriteBloomFilterOffset(int64_t& file_pos_) { + impl_->WriteBloomFilterOffset(file_pos_); +} + const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const { return impl_->descr(); } @@ -1239,6 +1287,7 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { return column_builder_ptr; } + int current_column() { return next_column_ - 1; } void Finish(int64_t total_bytes_written, int16_t row_group_ordinal) { diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 0186342d7f6..147714ed7c8 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -25,10 +25,16 @@ #include #include "arrow/util/key_value_metadata.h" +#include #include "parquet/platform.h" #include "parquet/properties.h" #include "parquet/schema.h" #include "parquet/types.h" +#include "parquet/thrift.h" + +#include "arrow/util/string_view.h" + +using arrow::util::string_view; namespace parquet { @@ -167,7 +173,15 @@ class PARQUET_EXPORT ColumnChunkMetaData { int64_t index_page_offset() const; int64_t total_compressed_size() const; int64_t total_uncompressed_size() const; +<<<<<<< HEAD std::unique_ptr crypto_metadata() const; +======= + int64_t column_index_offset() const; + int64_t offset_index_offset() const; + int64_t column_index_length() const; + int64_t offset_index_length() const; + int64_t bloom_filter_offset() const; +>>>>>>> 2d711a552... bloom-filter-reader private: explicit ColumnChunkMetaData( @@ -180,6 +194,39 @@ class PARQUET_EXPORT ColumnChunkMetaData { }; /// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData. +<<<<<<< Updated upstream +======= +enum BoundaryOrder{ + UNORDERED = 0, + ASCENDING = 1, + DESCENDING = 2 +}; + +class PARQUET_EXPORT PageLocation{ + int64_t offset; + int32_t compressed_page_size; + int64_t first_row_index; +}; + +class PARQUET_EXPORT ColumnIndex : format::PageHeader{ + public: + static std::unique_ptr Make( + std::vector null_pages, + std::vector min_values, + std::vector max_values, + BoundaryOrder boundary_order, + std::vector null_counts); + uint32_t read(apache::thrift::protocol::TProtocol* tp) { return parquet::format::PageHeader::read(tp); } +}; + +class PARQUET_EXPORT OffsetIndex : format::PageHeader{ + public: + static std::unique_ptr Make( + std::vector page_locations); + uint32_t read(apache::thrift::protocol::TProtocol* tp) { return parquet::format::PageHeader::read(tp); } +}; + +>>>>>>> Stashed changes class PARQUET_EXPORT RowGroupMetaData { public: /// \brief Create a RowGroupMetaData from a serialized thrift message. @@ -211,16 +258,37 @@ class PARQUET_EXPORT RowGroupMetaData { /// \brief Total byte size of all the uncompressed column data in this row group. int64_t total_byte_size() const; +<<<<<<< Updated upstream +======= +<<<<<<< HEAD +>>>>>>> Stashed changes /// \brief Byte offset from beginning of file to first page (data or /// dictionary) in this row group /// /// The file_offset field that this method exposes is optional. This method /// will return 0 if that field is not set to a meaningful value. int64_t file_offset() const; +<<<<<<< Updated upstream +======= +======= +>>>>>>> 5f0c77973... sorting columns +>>>>>>> Stashed changes // Return const-pointer to make it clear that this object is not to be copied const SchemaDescriptor* schema() const; +<<<<<<< HEAD // Indicate if all of the RowGroup's ColumnChunks can be decompressed. bool can_decompress() const; +<<<<<<< Updated upstream +======= +======= + std::unique_ptr ColumnChunk(int i) const; +<<<<<<< HEAD + std::vector sorting_columns; +>>>>>>> 15c06767f... sorting columns +======= + std::vector sorting_columns() const; +>>>>>>> 5f0c77973... sorting columns +>>>>>>> Stashed changes private: explicit RowGroupMetaData( @@ -403,6 +471,10 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder { const std::map& data_encoding_stats_, const std::shared_ptr& encryptor = NULLPTR); + void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, uint32_t& ci_len, uint32_t& oi_len); + + void WriteBloomFilterOffset(int64_t& file_pos); + // The metadata contents, suitable for passing to ColumnChunkMetaData::Make const void* contents() const; diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index de875f7a559..daa07cfe310 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -915,6 +915,9 @@ struct OffsetIndex { * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. */ 1: required list page_locations + + /** page level bloom filter offset **/ + 2: required list page_bloom_filter_offsets } /** @@ -953,6 +956,7 @@ struct ColumnIndex { /** A list containing the number of null values for each page **/ 5: optional list null_counts + } struct AesGcmV1 {