diff --git a/cpp/examples/parquet/CMakeLists.txt b/cpp/examples/parquet/CMakeLists.txt
index cc8f5a98150..7cbbf8efa0a 100644
--- a/cpp/examples/parquet/CMakeLists.txt
+++ b/cpp/examples/parquet/CMakeLists.txt
@@ -21,6 +21,16 @@ add_executable(parquet-arrow-example parquet-arrow/reader-writer.cc)
 add_executable(parquet-stream-api-example parquet-stream-api/stream-reader-writer.cc)
 target_include_directories(parquet-low-level-example PRIVATE low-level-api/)
 target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/)
+add_executable(parquet-with-index-page-skipping low-level-api/reader-writer-with-index.cc)
+add_executable(parquet-reader-with-pageindex low-level-api/reader-with-index.cc)
+add_executable(parquet-writer-with-pageindex low-level-api/writer-with-index.cc)
+target_include_directories(parquet-low-level-example PRIVATE low-level-api/)
+target_include_directories(parquet-low-level-example2 PRIVATE low-level-api/)
+target_include_directories(parquet-reader-with-pageindex PRIVATE low-level-api/)
+target_include_directories(parquet-writer-with-pageindex PRIVATE low-level-api/)
+target_link_libraries(parquet-low-level-example parquet_static)
+target_link_libraries(parquet-low-level-example2 parquet_static)
+target_link_libraries(parquet-with-index-page-skipping parquet_static)
 
 # The variables in these files are for illustration purposes
 set(PARQUET_EXAMPLES_WARNING_SUPPRESSIONS
@@ -47,6 +57,8 @@ if(UNIX)
       PROPERTY COMPILE_FLAGS "-Wno-unused-variable")
   endforeach()
 endif()
+target_link_libraries(parquet-reader-with-pageindex parquet_static)
+target_link_libraries(parquet-writer-with-pageindex parquet_static)
 
 # Prefer shared linkage but use static if shared build is deactivated
 if (ARROW_BUILD_SHARED)
@@ -76,3 +88,7 @@ if (PARQUET_REQUIRE_ENCRYPTION)
     parquet-encryption-example
     parquet-encryption-example-all-crypto-options)
 endif()
+  parquet-with-index-page-skipping
+  parquet-writer-with-pageindex
+  parquet-reader-with-pageindex
+  parquet-arrow-example)
diff --git a/cpp/examples/parquet/low-level-api/page-index-reader-test.sh b/cpp/examples/parquet/low-level-api/page-index-reader-test.sh
new file mode 100755
index 00000000000..ddab6d0b5d8
--- /dev/null
+++ b/cpp/examples/parquet/low-level-api/page-index-reader-test.sh
@@ -0,0 +1,15 @@
+## member queries
+echo "Launching member queries.."
+$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_m_sorted.parquet  1000000 &
+
+$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_m_unsorted.parquet 1000000 &
+
+## non-member queries
+echo "launching non-member queries.."
+$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_n_sorted.parquet  10000000 &
+
+$ARROW_HOME/build/debug/parquet-reader-with-pageindex ~/parquet_data/parquet_cpp_example_10000000_n_unsorted.parquet 10000000 &
+
+#perf record -ag -e faults -p $pid
+
+#iostat -k 1 -p sda > ~/parquet_data/debug_read_writes
\ No newline at end of file
diff --git a/cpp/examples/parquet/low-level-api/page-index-writer-test.sh b/cpp/examples/parquet/low-level-api/page-index-writer-test.sh
new file mode 100644
index 00000000000..c37b68e3aa7
--- /dev/null
+++ b/cpp/examples/parquet/low-level-api/page-index-writer-test.sh
@@ -0,0 +1,3 @@
+cd ~/parquet_data/
+
+$ARROW_HOME/build/debug/parquet-writer-with-pageindex $1 $2
\ No newline at end of file
diff --git a/cpp/examples/parquet/low-level-api/reader-with-index.cc b/cpp/examples/parquet/low-level-api/reader-with-index.cc
new file mode 100644
index 00000000000..1025cba87ce
--- /dev/null
+++ b/cpp/examples/parquet/low-level-api/reader-with-index.cc
@@ -0,0 +1,1216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cstdio>
+#include <iomanip>
+#include <sys/time.h>
+#include <arrow/io/file.h>
+#include <arrow/util/logging.h>
+#include <time.h>
+#include "parquet/api/reader.h"
+#include "parquet/column_reader.h"
+#include "parquet/column_scanner.h"
+#include "parquet/deprecated_io.h"
+#include "parquet/exception.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+#include "parquet/parquet_types.h"
+#include "parquet/file_reader.h"
+#include <chrono>
+
+using parquet::ConvertedType;
+using parquet::Repetition;
+using parquet::Type;
+using parquet::schema::GroupNode;
+using parquet::schema::PrimitiveNode;
+
+/*
+ * This example illustrates PARQUET-1404 for page level skipping in  
+ * writing and reading Parquet Files in C++ and serves as a
+ * reference to the API for reader and writer enhanced with Column Index and Offset Index
+ * The file contains all the physical data types supported by Parquet.
+ * This example uses the RowGroupWriter API that supports writing RowGroups based on a
+ *certain size
+ **/
+
+/* Parquet is a structured columnar file format
+ * Parquet File = "Parquet data" + "Parquet Metadata"
+ * "Parquet data" is simply a vector of RowGroups. Each RowGroup is a batch of rows in a
+ * columnar layout
+ * "Parquet Metadata" contains the "file schema" and attributes of the RowGroups and their
+ * Columns
+ * "file schema" is a tree where each node is either a primitive type (leaf nodes) or a
+ * complex (nested) type (internal nodes)
+ * For specific details, please refer the format here:
+ * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+ **/
+
+/*********************************************************************************
+                   PARQUET READER WITH PAGE SKIPPING EXAMPLE
+**********************************************************************************/
+
+constexpr int NUM_ROWS = 20;//2500000;
+constexpr int64_t ROW_GROUP_SIZE = (sizeof(uint32_t)+sizeof(int32_t)+sizeof(int64_t)+sizeof(float)+sizeof(double)
+
+                                    +sizeof(parquet::ByteArray)+sizeof(parquet::FixedLenByteArray))*NUM_ROWS;//16 * 1024 * 1024;  // 16 MB
+//char PARQUET_FILENAME[] = "";
+//const char PARQUET_FILENAME[] = "/home/abalajiee/parquet_data/testing_write.parquet";
+
+int parseLine(char* line){
+    // This assumes that a digit will be found and the line ends in " Kb".
+    int i = strlen(line);
+    const char* p = line;
+    while (*p <'0' || *p > '9') p++;
+    line[i-3] = '\0';
+    i = atoi(p);
+    return i;
+}
+
+int getMemValue(){ //Note: this value is in KB!
+    FILE* file = fopen("/proc/self/status", "r");
+    int result = -1; 
+    char line[128];
+
+    while (fgets(line, 128, file) != NULL){
+        if (strncmp(line, "VmRSS:", 6) == 0){ 
+            result = parseLine(line);
+            break;
+        }
+    }   
+    fclose(file);
+    return result;
+}
+
+int getReadBytesValue(){ //Note: this value is in KB!
+    FILE* file = fopen("/proc/self/io", "r");
+    int result = 0; 
+    char line[128];
+
+    while (fgets(line, 128, file) != NULL){
+        if (strncmp(line, "read_bytes:", 11) == 0){ 
+            result = parseLine(line);
+            break;
+        }
+    }   
+    fclose(file);
+    return result;
+}
+
+int getReadBytesCacheValue(){ //Note: this value is in KB!
+    FILE* file = fopen("/proc/self/io", "r");
+    int result = 0; 
+    char line[128];
+
+    while (fgets(line, 128, file) != NULL){
+        if (strncmp(line, "rchar:", 6) == 0){ 
+            result = parseLine(line);
+            break;
+        }
+    }   
+    fclose(file);
+    return result;
+}
+
+int getWriteBytesValue(){ //Note: this value is in KB!
+    FILE* file = fopen("/proc/self/io", "r");
+    int result = 0; 
+    char line[128];
+    int trunc = 0;
+    while (fgets(line, 128, file) != NULL){
+        if (strncmp(line, "write_bytes:", 12) == 0){ 
+            result = parseLine(line);
+            break;
+        }
+        if (strncmp(line, "cancelled_write_bytes:", 22) == 0){ 
+            trunc = parseLine(line);
+            break;
+        }
+    }   
+    fclose(file);
+    return result-trunc;
+}
+
+int getWriteBytesCacheValue(){ //Note: this value is in KB!
+    FILE* file = fopen("/proc/self/io", "r");
+    int result = 0; 
+    char line[128];
+    while (fgets(line, 128, file) != NULL){
+        if (strncmp(line, "wchar:", 6) == 0){ 
+            result = parseLine(line);
+            break;
+        }
+    }   
+    fclose(file);
+    return result;
+}
+
+/*
+rchar: 91439151
+wchar: 986032
+syscr: 54376
+syscw: 45314
+read_bytes: 17989632
+write_bytes: 626688
+cancelled_write_bytes: 233472
+*/
+
+struct return_multiple{
+   std::shared_ptr<parquet::ColumnReader> column_reader;
+   bool b;
+   int32_t p;
+   int64_t r;
+   uint32_t e;
+   double d;
+   float i;
+   char *c,*a,*t;
+};
+
+typedef return_multiple return_multiple;
+
+typedef struct time_to_run{
+       float wo_index = 0.0;                      //without index
+       float wo_total_pages_scanned = 0.0;      
+       float wo_totaltime = 0.0;
+       float wo_mem_used = 0.0;
+       float wo_read_bytes = 0.0;
+       float wo_write_bytes = 0.0;
+       float w_totaltime = 0.0;                   //with index without binary without blf
+       float w_index = 0.0;
+       float w_total_pages_scanned = 0.0;
+       float w_mem_used = 0.0;
+       float w_read_bytes = 0.0;
+       float w_write_bytes = 0.0;
+       float b_totaltime = 0.0;                  //with binary search  without blf
+       float b_index = 0.0;
+       float b_total_pages_scanned = 0.0;
+       float b_mem_used = 0.0;
+       float b_read_bytes = 0.0;
+       float b_write_bytes = 0.0;
+       float w_blf_totaltime = 0.0;              // with blf without pageblf
+       float w_blf_index = 0.0;
+       float w_blf_total_pages_scanned = 0.0;
+       float w_blf_mem_used = 0.0;
+       float w_blf_read_bytes = 0.0;
+       float w_blf_write_bytes = 0.0;
+       float w_pageblf_totaltime = 0.0;              // with blf with pageblf
+       float w_pageblf_index = 0.0;
+       float w_pageblf_total_pages_scanned = 0.0;
+       float w_pageblf_mem_used = 0.0;
+       float w_pageblf_read_bytes = 0.0;
+       float w_pageblf_write_bytes = 0.0;
+       float blf_load_time = 0.0;
+       float index_load_time = 0.0;
+  } trun;
+
+int parquet_writer(int argc, char** argv);
+
+void returnReaderwithType(std::shared_ptr<parquet::ColumnReader> cr, parquet::ColumnReader*& cr1);
+
+return_multiple getPredicate(std::shared_ptr<parquet::ColumnReader> cr,std::shared_ptr<parquet::RowGroupReader> rg,char* predicate,
+                             int& col_id,int64_t& page_index,int& PREDICATE_COL,int64_t& row_index,bool with_index, 
+                             bool binary_search, int64_t& count_pages_scanned, int64_t& total_num_pages, 
+                             int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                             std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index);
+
+bool printVal(std::ofstream& runfile, std::shared_ptr<parquet::ColumnReader>column_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals,int64_t& row_counter,
+               bool checkpredicate,int equal_to);
+bool printRange(std::shared_ptr<parquet::ColumnReader>column_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals_min,return_multiple vals_max,int64_t& row_counter);
+
+trun run_for_one_predicate(std::ofstream& runfile, int num_columns,std::shared_ptr<parquet::RowGroupReader>& row_group_reader, std::unique_ptr<parquet::ParquetFileReader>& parquet_reader, int col_id,char** argv,
+                           int predicate_index, int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf);
+
+int64_t first_pass_for_predicate_only(std::ofstream& runfile,std::shared_ptr<parquet::RowGroupReader> rg,int predicate_column_number,int num_columns, char* predicate,
+                                      bool with_index, int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf);
+
+int parquet_reader(int argc, char** argv);
+/**************Declaration END*********************************/
+
+
+int main(int argc, char** argv) {
+
+  parquet_reader(argc,argv);
+
+  std::cout << "Parquet Writing and Reading Complete" << std::endl;
+
+  return 0;
+}
+
+void getnumrows(char* num,int64_t& num_rows){
+  int charlen = strlen(num);
+  int charin = 0;
+  while ( num[charin] != '\0' ) {
+    num_rows += (num[charin] - 48)*((int64_t)pow(10,charlen-charin));
+    charin++;
+  }
+}
+
+int intlog(int num_rows){
+  return (int)log10(num_rows);
+}
+
+char* convertToCharptr(int64_t number,char*& predicate,int charlen){
+  int i = 0;
+  for ( ; i < charlen ; i++ ) {
+     predicate[charlen-i-1] = number%10+48;
+     number = number/10;
+  }
+  predicate[i] = '\0';
+  return predicate;
+}
+
+
+int parquet_reader(int argc,char** argv) {
+
+   std::string PARQUET_FILENAME = argv[1];
+   try {
+     // Create a ParquetReader instance
+     std::unique_ptr<parquet::ParquetFileReader> parquet_reader =
+       parquet::ParquetFileReader::OpenFile(PARQUET_FILENAME, false);
+
+     // Get the File MetaData
+     std::shared_ptr<parquet::FileMetaData> file_metadata = parquet_reader->metadata();
+
+     int num_row_groups = file_metadata->num_row_groups();
+
+     // Get the number of Columns
+     int num_columns = file_metadata->num_columns();
+     //      assert(num_columns == NUM_COLS);
+     std::ofstream runfile;
+     runfile.open(PARQUET_FILENAME+"-run-results.txt");//+"-"+std::to_string(col_id);
+     if ( argc == 3 ){
+        // Point Queries & range queries
+        
+        int64_t num_rows = 0;
+        int num_queries = 1000;
+        int num_runs = 1;
+
+      //   char *col_num = argv[3];
+      //  std::stringstream ss(col_num);
+      //  int col_id;
+      //  ss >> col_id;
+
+        getnumrows(argv[2],num_rows);
+        
+        trun times_by_type[num_columns];
+        
+        runfile << time(NULL) << std::endl;
+        runfile << "############################## --  RUNNING POINT QUERIES -- ########################################" << std::endl;
+        for ( int col_id = 0; col_id < num_columns; col_id++){
+                  
+          times_by_type[col_id].w_index = 0.0;
+          times_by_type[col_id].wo_index = 0.0;
+          times_by_type[col_id].wo_totaltime = 0.0;
+          times_by_type[col_id].w_totaltime = 0.0;
+          times_by_type[col_id].b_totaltime = 0.0;
+          times_by_type[col_id].w_blf_totaltime = 0.0;
+          times_by_type[col_id].w_pageblf_totaltime = 0.0;
+          times_by_type[col_id].b_index = 0.0;
+          times_by_type[col_id].wo_total_pages_scanned = 0.0;
+          times_by_type[col_id].w_total_pages_scanned = 0.0;
+          times_by_type[col_id].b_total_pages_scanned = 0.0;
+          times_by_type[col_id].blf_load_time = 0.0;
+          times_by_type[col_id].index_load_time = 0.0;
+        }
+        
+        // over each rowgroup
+        for ( int r = 0; r < num_row_groups; r++) {
+        // for each column so many queries run so many times.
+        for ( int col_id =0; col_id < num_columns; col_id++){
+            //re-initialize column index, offset index and bloomfilters for each column
+            std::shared_ptr<parquet::RowGroupReader> row_group_reader = parquet_reader->RowGroup(r);
+         // for that column so many runs
+          for(int i=0; i < num_runs; i++){    
+            int predicateindex = 0;
+            char** predicates = (char**)malloc(sizeof(char*)*num_queries);
+            while ( predicateindex < num_queries ){
+              // one query of the queries of the run
+              // sleep(1);
+              srand(time(NULL));
+              char* predicate_val = (char*)malloc(intlog(num_rows)+1);
+              convertToCharptr(rand()%num_rows,predicate_val,intlog(num_rows));
+              predicates[predicateindex] = predicate_val;
+            
+              runfile  << " run number " << i << "-- Query number " << predicateindex << "-- col_num " << col_id  << " predicate: " << predicates[predicateindex] << std::endl;
+              trun avgtime = run_for_one_predicate(runfile, num_columns,row_group_reader,parquet_reader,col_id,predicates,predicateindex,0,true,true,true);
+              
+              times_by_type[col_id].wo_totaltime += avgtime.wo_totaltime;
+              times_by_type[col_id].w_totaltime += avgtime.w_totaltime;
+              times_by_type[col_id].b_totaltime += avgtime.b_totaltime;
+              times_by_type[col_id].w_blf_totaltime += avgtime.w_blf_totaltime;
+              // times_by_type[col_id].w_pageblf_totaltime = avgtime.w_pageblf_totaltime;
+
+              times_by_type[col_id].wo_total_pages_scanned += avgtime.wo_total_pages_scanned;
+              times_by_type[col_id].w_total_pages_scanned += avgtime.w_total_pages_scanned;
+              times_by_type[col_id].b_total_pages_scanned += avgtime.b_total_pages_scanned;
+              times_by_type[col_id].w_blf_total_pages_scanned += avgtime.w_blf_total_pages_scanned;
+              // times_by_type[col_id].w_pageblf_total_pages_scanned += avgtime.w_pageblf_total_pages_scanned;
+
+              times_by_type[col_id].wo_mem_used += avgtime.wo_mem_used;
+              times_by_type[col_id].w_mem_used += avgtime.w_mem_used;
+              times_by_type[col_id].b_mem_used += avgtime.b_mem_used;
+              times_by_type[col_id].w_blf_mem_used += avgtime.w_blf_mem_used;
+              // times_by_type[col_id].w_pageblf_mem_used += avgtime.w_pageblf_mem_used;
+
+              times_by_type[col_id].wo_read_bytes += avgtime.wo_read_bytes;
+              times_by_type[col_id].w_read_bytes += avgtime.w_read_bytes;
+              times_by_type[col_id].b_read_bytes += avgtime.b_read_bytes;
+              times_by_type[col_id].w_blf_read_bytes += avgtime.w_blf_read_bytes;
+              // times_by_type[col_id].w_pageblf_read_bytes += avgtime.w_pageblf_read_bytes;
+
+              times_by_type[col_id].wo_write_bytes += avgtime.wo_write_bytes;
+              times_by_type[col_id].w_write_bytes += avgtime.w_write_bytes;
+              times_by_type[col_id].b_write_bytes += avgtime.b_write_bytes;
+              times_by_type[col_id].w_blf_write_bytes += avgtime.w_blf_write_bytes;
+              // times_by_type[col_id].w_pageblf_write_bytes += avgtime.w_pageblf_write_bytes;
+              times_by_type[col_id].blf_load_time = row_group_reader->GetBLFLoadTime();
+              times_by_type[col_id].index_load_time = row_group_reader->GetIndexLoadTime();
+
+              predicateindex++;
+            }
+          }
+
+          runfile << "############################### -- POINT QUERY RUN TIME RESULTS FINAL --" << col_id << "-- ################################" << std::endl;
+
+          runfile<< "|----------------------------col_num " << col_id << "----------------------------|" << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w/o index " 
+          << (times_by_type[col_id].wo_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].wo_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].wo_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].wo_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].wo_write_bytes
+          << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index " 
+          << (times_by_type[col_id].w_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].w_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].w_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].w_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].w_write_bytes
+          << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index with bloomfilter " 
+          << (times_by_type[col_id].b_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].b_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].b_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].b_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].b_write_bytes
+          << std::endl;
+        
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w/o index with bloomfilter " 
+          << (times_by_type[col_id].w_blf_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].w_blf_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].w_blf_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].w_blf_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].w_blf_write_bytes
+          << std::endl;
+
+          // runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index with binary with bloomfilter " 
+          // << (times_by_type[col_id].w_pageblf_totaltime/(num_runs*num_queries)) << std::endl
+          // << " avg num of datapage indices scanned " << (times_by_type[col_id].w_pageblf_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          // << " avg memory used in kB " << times_by_type[col_id].w_pageblf_mem_used << std::endl
+          // << " avg bytes read " << times_by_type[col_id].w_pageblf_read_bytes << std::endl
+          // << " avg bytes written " << times_by_type[col_id].w_pageblf_write_bytes
+          // << std::endl;
+            
+          runfile<< "|----------------------------------------------------------------------------------|" << std::endl;
+
+        }
+        }
+         
+        runfile << "############################### -- POINT QUERY RUN TIME RESULTS FINAL  ################################" << std::endl;
+        for ( int col_id =0; col_id < num_columns; col_id++){
+          runfile<< "|----------------------------col_num " << col_id << "----------------------------|" << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w/o index " 
+          << (times_by_type[col_id].wo_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].wo_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].wo_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].wo_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].wo_write_bytes
+          << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index " 
+          << (times_by_type[col_id].w_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].w_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].w_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].w_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].w_write_bytes
+          << " index load time " << times_by_type[col_id].index_load_time
+          << std::endl;
+          
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index with bloomfilter " 
+          << (times_by_type[col_id].b_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].b_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].b_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].b_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].b_write_bytes
+          << " index load time " << times_by_type[col_id].index_load_time
+          << " blf load time " << times_by_type[col_id].blf_load_time
+          << std::endl;
+        
+          runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w/o index with bloomfilter " 
+          << (times_by_type[col_id].w_blf_totaltime/(num_runs*num_queries)) << std::endl
+          << " avg num of datapage indices scanned " << (times_by_type[col_id].w_blf_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          << " avg memory used in kB " << times_by_type[col_id].w_blf_mem_used << std::endl
+          << " avg bytes read " << times_by_type[col_id].w_blf_read_bytes << std::endl
+          << " avg bytes written " << times_by_type[col_id].w_blf_write_bytes
+          << " blf load time " << times_by_type[col_id].blf_load_time
+          << std::endl;
+
+          // runfile << std::setprecision(3)  <<"POINT QUERY: minimum average time w index with binary with bloomfilter " 
+          // << (times_by_type[col_id].w_pageblf_totaltime/(num_runs*num_queries)) << std::endl
+          // << " avg num of datapage indices scanned " << (times_by_type[col_id].w_pageblf_total_pages_scanned/(num_runs*num_queries)) << std::endl
+          // << " avg memory used in kB " << times_by_type[col_id].w_pageblf_mem_used << std::endl
+          // << " avg bytes read " << times_by_type[col_id].w_pageblf_read_bytes << std::endl
+          // << " avg bytes written " << times_by_type[col_id].w_pageblf_write_bytes
+          // << std::endl;
+            
+          runfile<< "|----------------------------------------------------------------------------------|" << std::endl;
+
+        }
+        runfile << "#######################################################################################################" << std::endl;
+      }
+
+     if ( argc == 4 ) {
+       char *col_num = argv[2];
+       std::stringstream ss(col_num);
+       int colid;
+       ss >> colid;
+       for ( int r = 0; r < num_row_groups; r++) {
+        std::shared_ptr<parquet::RowGroupReader> row_group_reader = parquet_reader->RowGroup(r);
+          run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,3,0,true,true,true);
+       }
+     }
+     
+
+     if ( argc == 5 ){
+       char *col_num = argv[2];
+       std::stringstream ss(col_num);
+       int colid;
+       ss >> colid;
+       for ( int r = 0; r < num_row_groups; r++) {
+        std::shared_ptr<parquet::RowGroupReader> row_group_reader = parquet_reader->RowGroup(r);
+         run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,3,1,true,true,true);
+         run_for_one_predicate(runfile,num_columns,row_group_reader,parquet_reader,colid,argv,4,-1,true,true,true);
+       }
+     }
+     runfile.close();
+     return 0;
+   } catch (const std::exception& e) {
+      std::cerr << "Parquet read error: " << e.what() << std::endl;
+      return -1;
+  }
+
+}
+
+trun run_for_one_predicate(std::ofstream& runfile,int num_columns,std::shared_ptr<parquet::RowGroupReader>& row_group_reader, std::unique_ptr<parquet::ParquetFileReader>& parquet_reader, int colid,char** argv,int predicate_index, 
+                           int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf) {
+
+    
+    trun avgtime;
+    int64_t prev_num_bytes_r = 0;
+    int64_t prev_num_bytes_rc = 0;
+    int64_t prev_num_bytes_w = 0;
+    int64_t prev_num_bytes_wc = 0;
+    int64_t curr_num_bytes_r = 0;
+    int64_t curr_num_bytes_rc = 0;
+    int64_t curr_num_bytes_w = 0;
+    int64_t curr_num_bytes_wc = 0;
+    int64_t prev_mem_used = 0;
+    int64_t curr_mem_used = 0;
+  // Iterate over all the RowGroups in the file
+    //for (int r = 0; r < num_row_groups; ++r) 
+    {
+    
+      
+      char *predicate_val  = argv[predicate_index];
+
+      int col_id = colid;
+        // Get the RowGroup Reader
+
+        clock_t start_time,end_time;
+        float total_time= 0.0;
+        int num_runs = 1;
+         
+        float total_pages_scanned = 0.0;
+
+        runfile << " Column ID: " << col_id << "| Column Type: " << row_group_reader->Column(col_id)->type() << std::endl;
+
+        /********FIRST PASS WITHOUT INDEX***************/
+        /*total_time = 0.0;
+        prev_mem_used = getMemValue();
+        prev_num_bytes_r = getReadBytesValue();
+        prev_num_bytes_w = getWriteBytesValue();
+        runfile << " ########################################################################## " << std::endl;
+        runfile << "\n time for predicate one pass without index: " << std::endl;
+        for(int t  =0 ; t< num_runs; t++){
+            auto start_time = std::chrono::high_resolution_clock::now();
+          total_pages_scanned += first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,false,equal_to,!binary_search,!with_bloom_filter, !with_page_bf);
+          auto end_time = std::chrono::high_resolution_clock::now();
+          auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+            float time_elapsed = (float) duration.count();
+
+            runfile << std::setprecision(3) << time_elapsed << std::endl;
+            curr_mem_used = getMemValue();
+            curr_num_bytes_r = getReadBytesValue();
+            curr_num_bytes_w = getWriteBytesValue();
+            runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl;
+            runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl;
+            runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; 
+            runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; 
+
+            total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed;
+        }
+        avgtime.wo_total_pages_scanned = total_pages_scanned/num_runs;
+        avgtime.wo_totaltime = total_time;
+        avgtime.wo_mem_used = curr_mem_used-prev_mem_used;
+        avgtime.wo_read_bytes = curr_num_bytes_r - prev_num_bytes_r;
+        avgtime.wo_write_bytes = curr_num_bytes_w - prev_num_bytes_w;
+        runfile << " ------------------------------------------------------------------------ " << std::endl;*/
+       
+        /**************FIRST PASS WITH INDEX WITHOUT BINARY WITHOUT BF PAGE BF*****************/
+
+        /*total_time = 0.0;
+        total_pages_scanned = 0.0;
+        prev_mem_used = getMemValue();
+        prev_num_bytes_r = getReadBytesValue();
+        prev_num_bytes_w = getWriteBytesValue();
+        runfile << " ------------------------------------------------------------------------ " << std::endl;
+        runfile << "\n time for predicate one pass without bloom filter: " << std::endl;
+        for(int t  =0 ; t< num_runs; t++){
+            auto start_time = std::chrono::high_resolution_clock::now();
+          first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,true,equal_to, !binary_search, !with_bloom_filter,!with_page_bf);
+          auto end_time = std::chrono::high_resolution_clock::now();
+          auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+            float time_elapsed = (float) duration.count();
+
+            runfile << std::setprecision(3) << time_elapsed << std::endl;
+            curr_mem_used = getMemValue();
+            curr_num_bytes_r = getReadBytesValue();
+            curr_num_bytes_w = getWriteBytesValue();
+            runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl;
+            runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl;
+            runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; 
+            runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; 
+            runfile << "\n index load time: " << row_group_reader->GetIndexLoadTime() << std::endl; 
+            total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed;
+        }
+        
+        avgtime.w_total_pages_scanned = total_pages_scanned/num_runs;
+        avgtime.w_totaltime = total_time;
+        avgtime.w_mem_used = curr_mem_used-prev_mem_used;
+        avgtime.w_read_bytes = curr_num_bytes_r - prev_num_bytes_r;
+        avgtime.w_write_bytes = curr_num_bytes_w - prev_num_bytes_w;
+        runfile << " ------------------------------------------------------------------------ " << std::endl;*/
+        /**************FIRST PASS WITH INDEX WITH BINARY WITHOUT BF PAGE BF*****************/
+
+        /*total_time = 0.0;
+        total_pages_scanned = 0.0;
+        prev_mem_used = getMemValue();
+        prev_num_bytes_r = getReadBytesValue();
+        prev_num_bytes_w = getWriteBytesValue();
+        runfile << " ------------------------------------------------------------------------ " << std::endl;
+        runfile << "\n time for predicate one pass with bloom filter: "  << std::endl;
+        for(int t  =0 ; t< num_runs; t++){
+            auto start_time = std::chrono::high_resolution_clock::now();
+          first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,true,equal_to, !binary_search, with_bloom_filter,!with_page_bf);
+          auto end_time = std::chrono::high_resolution_clock::now();
+          auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+          
+            float time_elapsed = (float) duration.count();
+
+            runfile << std::setprecision(3) << time_elapsed << std::endl;
+            curr_mem_used = getMemValue();
+            curr_num_bytes_r = getReadBytesValue();
+            curr_num_bytes_w = getWriteBytesValue();
+            runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl;
+            runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl;
+            runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; 
+            runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; 
+            runfile << "\n index load time: " << row_group_reader->GetIndexLoadTime() << std::endl; 
+            runfile << "\n blf load time: " << row_group_reader->GetBLFLoadTime() << std::endl; 
+            total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed;
+        }
+        
+        avgtime.b_total_pages_scanned = total_pages_scanned/num_runs;
+        avgtime.b_totaltime = total_time;
+        avgtime.b_mem_used = curr_mem_used-prev_mem_used;
+        avgtime.b_read_bytes = curr_num_bytes_r - prev_num_bytes_r;
+        avgtime.b_write_bytes = curr_num_bytes_w - prev_num_bytes_w;
+        runfile << " ------------------------------------------------------------------------ " << std::endl;*/
+        /**************FIRST PASS WITH INDEX WITH BINARY WITH BF WITHOUT PAGE BF*****************/
+
+        total_time = 0.0;
+        total_pages_scanned = 0.0;
+        prev_mem_used = getMemValue();
+        prev_num_bytes_r = getReadBytesValue();
+        prev_num_bytes_w = getWriteBytesValue();
+        runfile << " ------------------------------------------------------------------------ " << std::endl;
+        runfile << "\n time for predicate without index with bloom filter: " << std::endl;
+        for(int t  =0 ; t< num_runs; t++){
+            auto start_time = std::chrono::high_resolution_clock::now();
+          first_pass_for_predicate_only(runfile, row_group_reader,col_id,num_columns,predicate_val,false,equal_to, !binary_search, with_bloom_filter,!with_page_bf);
+          auto end_time = std::chrono::high_resolution_clock::now();
+          auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+
+            float time_elapsed = (float) duration.count();
+
+            runfile << std::setprecision(3) << time_elapsed << std::endl;
+            curr_mem_used = getMemValue();
+            curr_num_bytes_r = getReadBytesValue();
+            curr_num_bytes_w = getWriteBytesValue();
+            runfile << "\n memory used currently by the process in virtual memory (in kB): " << curr_mem_used << std::endl;
+            runfile << "\n change in memory used (in kB): " << curr_mem_used-prev_mem_used << std::endl;
+            runfile << "\n number of bytes read from storage layer (in B): " << curr_num_bytes_r - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written to storage (in B): " << curr_num_bytes_w - prev_num_bytes_w << std::endl; 
+            runfile << "\n number of bytes read from cache (in B): " << curr_num_bytes_rc - prev_num_bytes_r << std::endl;
+            runfile << "\n number of bytes written cancelled by cache (in B): " << curr_num_bytes_wc - prev_num_bytes_wc << std::endl; 
+            runfile << "\n blf load time: " << row_group_reader->GetBLFLoadTime() << std::endl; 
+            total_time = (t!=0 && time_elapsed > total_time)? total_time:time_elapsed;
+        }
+        
+        avgtime.w_blf_total_pages_scanned = total_pages_scanned/num_runs;
+        avgtime.w_blf_totaltime = total_time;
+        avgtime.w_blf_mem_used = curr_mem_used-prev_mem_used;
+        avgtime.w_blf_read_bytes = curr_num_bytes_r - prev_num_bytes_r;
+        avgtime.w_blf_write_bytes = curr_num_bytes_w - prev_num_bytes_w;
+        runfile << " ########################################################################## " << std::endl;
+
+      /***********FIRST PASS END **********/
+
+      /***********Second PASS *************/
+                //  TODO //
+
+      /***********************************/
+      
+     }
+     return avgtime;
+}
+
+
+int64_t first_pass_for_predicate_only(std::ofstream& runfile, std::shared_ptr<parquet::RowGroupReader> row_group_reader,int col_id, int num_columns, char* predicate_val,bool with_index,
+                                   int equal_to, bool binary_search, bool with_bloom_filter, bool with_page_bf) {
+
+    int64_t row_index = 0;
+    int64_t count_pages_scanned = 0, total_num_pages = 0, last_first_row = 0;
+
+    std::vector<int> col_row_counts(num_columns, 0);
+
+    //      assert(row_group_reader->metadata()->total_byte_size() < ROW_GROUP_SIZE);
+
+    // int16_t definition_level;
+    // int16_t repetition_level;
+    std::shared_ptr<parquet::ColumnReader> column_reader;
+    
+
+    // std::cout<< "test arg v" <<argv[1] << std::endl;
+  
+    int64_t page_index = -1;
+
+    std::vector<int64_t> unsorted_page_index; 
+    std::vector<int64_t> unsorted_row_index;
+
+    char c;
+    // int64_t predicate;
+    // sscanf(argv[2], "%" SCNd64 "%c", &predicate, &c);
+
+    // int PREDICATE_COL;
+    // sscanf(argv[2], "%d" "%c", &PREDICATE_COL, &c);
+    // Get the Column Reader for the Int64 column
+      std::shared_ptr<parquet::ColumnReader> predicate_column_reader = row_group_reader->Column(col_id);
+      
+      
+      
+      // std::cout << "given predicate: " << predicate << " type of predicate: " << typeid(predicate).name() << std::endl;
+      
+      std::shared_ptr<parquet::ColumnReader> column_reader_with_index;
+      
+      parquet::ColumnReader* generic_reader;
+  
+      int PREDICATE_COL  = col_id;
+      return_multiple vals = getPredicate(predicate_column_reader,row_group_reader,predicate_val,col_id,page_index,PREDICATE_COL,row_index,with_index,binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter, with_page_bf,
+                                            unsorted_page_index, unsorted_row_index);
+      column_reader_with_index = vals.column_reader;
+      
+      //SAMPLE row group reader call in the comment below
+      // row_group_reader->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,predicate_column_reader->type());
+
+      returnReaderwithType(column_reader_with_index,generic_reader);
+
+      int counter = 0;
+      int ind = 0;
+      int64_t row_counter = 0;
+
+      if (unsorted_row_index.size()==0){
+        if ( row_index != -1 ) {
+        if(with_index){
+          ind = row_index;
+          row_counter = 0;
+          generic_reader->Skip(row_index);
+          do{ ind++;
+            if((printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to)))
+               break;
+          }while((generic_reader->HasNext()));
+        }
+        else{
+            while (generic_reader->HasNext()) { 
+              ind++;
+              count_pages_scanned++;
+              if(printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to))
+               break;
+          //        int64_t expected_value = col_row_counts[col_id];  
+          //        assert(value == expected_value);
+              col_row_counts[col_id]++;
+            } 
+          }
+          // Read all the rows in the column
+          runfile << "| page index: " << page_index << "| number of rows loaded: " << ind <<
+          "| total number of pages: " << total_num_pages << "| last page first row index: " << last_first_row << std::endl;
+        
+        }
+        else{
+          runfile << "non-member query" << std::endl;
+        }
+      }
+       else{
+         ind = 0;
+         int index_list_count = 0;
+         bool found = false;
+         for(int64_t row_index: unsorted_row_index) {
+              row_counter = 0;
+              generic_reader->Skip(row_index);
+              do{ ind++;
+                  if((printVal(runfile, column_reader_with_index,generic_reader,ind,vals,row_counter,true,equal_to))){
+                    found = true;
+                    break;
+                  }
+                  
+              }while((generic_reader->HasNext()));
+            // Read all the rows in the column
+            runfile << "| page index: " << unsorted_page_index[index_list_count] << "| number of rows loaded: " << ind <<
+           "| total number of pages: " << total_num_pages << "| last page first row index: " << last_first_row << std::endl;
+            index_list_count++;
+            if (found) break;
+          }
+          if ( ind == (int)unsorted_row_index.size())
+             runfile << "non-member query" << std::endl;
+       }
+
+      return count_pages_scanned;
+}
+
+return_multiple getPredicate(std::shared_ptr<parquet::ColumnReader> cr,std::shared_ptr<parquet::RowGroupReader> rg,char* predicate_val,
+                             int& col_id,int64_t& page_index,int& PREDICATE_COL,int64_t& row_index, bool with_index, 
+                             bool binary_search, int64_t& count_pages_scanned,
+                             int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                             std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index){
+    const int CHAR_LEN = 10000000;
+    
+    return_multiple vals;
+    std::stringstream ss(predicate_val);
+    switch(cr->type()){
+          case Type::BOOLEAN:{
+            bool b;
+            
+            ss >> std::boolalpha >> b;
+            void * predicate = static_cast<void*>(&b);
+
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.b = b;
+            return vals;
+          }
+          case Type::INT32:{
+            int32_t val;
+            
+            ss >> val;
+            void * predicate = static_cast<void*>(&val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.p = val;
+            return vals;
+          }
+          case Type::INT64:{
+            int64_t val;
+            
+            ss >> val;
+            void * predicate = static_cast<void*>(&val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.r = val;
+            return vals;
+          }
+          case Type::INT96:{
+            uint32_t val;
+            
+            ss >> val;
+            void * predicate = static_cast<void*>(&val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.e = val;
+            return vals;
+          }
+          case Type::FLOAT:{
+            float val;
+            
+            ss >> val;
+            void * predicate = static_cast<void*>(&val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.d = val;
+            return vals;
+          }
+          case Type::DOUBLE:{
+            double val;
+            
+            ss >> val;
+            void * predicate = static_cast<void*>(&val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.i = val;
+            return vals;
+          }
+          case Type::BYTE_ARRAY:{
+            char* val = predicate_val;
+            
+            void * predicate = static_cast<void*>(val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.c = val;
+            return vals;
+          }
+          case Type::FIXED_LEN_BYTE_ARRAY:{
+            char* val = predicate_val;
+            
+            void * predicate = static_cast<void*>(val);
+            vals.column_reader = 
+                      rg->ColumnWithIndex(col_id,predicate,page_index,PREDICATE_COL,row_index,cr->type(),with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter,with_page_bf,
+                                            unsorted_min_index, unsorted_row_index);
+            vals.a = val;
+            return vals;
+          }
+          default:{
+            std::cout<< "type not supported" << std::endl;
+            vals.a = NULL;
+            vals.b = NULL;
+            vals.c = NULL;
+            vals.t = NULL;
+            return vals;
+          }
+    }
+}
+
+void returnReaderwithType(std::shared_ptr<parquet::ColumnReader>column_reader, parquet::ColumnReader*& int64_reader){
+      switch (column_reader->type()) {
+       case Type::BOOLEAN:
+           int64_reader = static_cast<parquet::BoolReader*>(column_reader.get());
+           break;
+        case Type::INT32:
+          int64_reader = static_cast<parquet::Int32Reader*>(column_reader.get());
+          break;
+        case Type::INT64:
+          int64_reader = static_cast<parquet::Int64Reader*>(column_reader.get());
+          break;
+        case Type::INT96:
+           int64_reader = static_cast<parquet::Int96Reader*>(column_reader.get());
+           break;
+        case Type::FLOAT:
+           int64_reader = static_cast<parquet::FloatReader*>(column_reader.get());
+           break;
+        case Type::DOUBLE:
+           int64_reader = static_cast<parquet::DoubleReader*>(column_reader.get());
+           break;
+        case Type::BYTE_ARRAY:
+            int64_reader = static_cast<parquet::ByteArrayReader*>(column_reader.get());
+            break;
+        case Type::FIXED_LEN_BYTE_ARRAY:
+            int64_reader = static_cast<parquet::FixedLenByteArrayReader*>(column_reader.get());
+            break;
+        default:
+           parquet::ParquetException::NYI("type reader not implemented");
+      }
+}
+
+bool printVal(std::ofstream& runfile, std::shared_ptr<parquet::ColumnReader>column_reader, parquet::ColumnReader* int64_reader,int ind,return_multiple vals,int64_t& row_counter,
+              bool checkpredicate = false,int equal_to = 0) {
+
+      int64_t values_read = 0;
+      //int64_t 0;
+       switch (column_reader->type()) {
+       case Type::BOOLEAN:
+          {
+           bool test;
+           bool predicate = vals.b;
+           int64_reader->callReadBatch(1,&test,&values_read);
+           row_counter = ind;
+           
+           if ( equal_to == 0 && checkpredicate && test == predicate) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << test << "\n" ;
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && test < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && test > predicate ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << test << "\n";
+           return false;
+          }
+           break;
+          }
+        case Type::INT32:
+          {
+            int32_t val;
+            int32_t predicate = vals.p;
+            int64_reader->callReadBatch(1,&val,&values_read);
+           row_counter = ind;
+           
+           if ( equal_to == 0 && checkpredicate && val == predicate) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << val << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && val < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && val > predicate ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << val << "\n";
+           return false;
+          }
+          break;
+          }
+        case Type::INT64:
+         {
+          int64_t value;
+          int64_t predicate = vals.r;
+         // Read one value at a time. The number of rows read is returned. values_read
+         // contains the number of non-null rows
+          int64_reader->callReadBatch(1,&value,&values_read);
+
+        // Ensure only one value is read
+          //assert(rows_read == 1);
+        // There are no NULL values in the rows written
+       //        assert(values_read == 1);
+        // Verify the value written
+          if ( equal_to == 0 && checkpredicate && value == predicate) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << value << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && value < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && value > predicate ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << value << "\n";
+           return false;
+          }
+          break;
+         }
+        case Type::INT96:
+           {
+              uint32_t val;
+              uint32_t predicate = vals.e;
+           int64_reader->callReadBatch(1,&val,&values_read);
+           row_counter = ind;
+           
+           if ( equal_to == 0 && checkpredicate && val == predicate) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << val << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && val < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && val > predicate ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << val << "\n";
+           return false;
+          }
+           break;
+           }
+        case Type::FLOAT:
+           {
+              float val;
+              float predicate = vals.d;
+              float error_factor = 9*pow(10,15);
+           int64_reader->callReadBatch(1,&val,&values_read);
+           if ( checkpredicate && fabs(val-predicate)<=std::numeric_limits<double>::epsilon()*error_factor) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << val << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && val < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && val > predicate ) {
+
+          }
+          
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << val << "\n";
+           return false;
+          }
+           break;
+           }
+        case Type::DOUBLE:
+           {
+              double val;
+              double predicate = vals.i;
+           int64_reader->callReadBatch(1,&val,&values_read);
+           double error_factor = 9*pow(10,15);
+
+           if ( equal_to == 0 && checkpredicate && fabs(val-predicate)<=std::numeric_limits<double>::epsilon()*error_factor) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << val << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && val < predicate ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && val > predicate ) {
+
+          }
+          
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << val << "\n";
+           return false;
+          }
+           break;
+           }
+        case Type::BYTE_ARRAY:
+          {
+            parquet::ByteArray str;
+            char* predicate = vals.c;
+            uint32_t FIXED_LENGTH = 124;
+            char dest[FIXED_LENGTH];
+            for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(predicate));i++) dest[i] = '0';
+            for ( uint32_t i = (FIXED_LENGTH-strlen(predicate)); i < FIXED_LENGTH;i++) dest[i] = predicate[i-(FIXED_LENGTH-strlen(predicate))];
+            dest[FIXED_LENGTH] = '\0';
+            std::string pstring(dest);
+            int64_reader->callReadBatch(1,&str,&values_read);
+            std::string result_value = parquet::ByteArrayToString(str);
+            // std::string result(result_value.substr(result_value.length()-strlen(predicate),strlen(predicate)));
+            row_counter = ind;
+            // std::cout << "row number: " << row_counter << " " << result << "\n";
+            if ( equal_to == 0 && checkpredicate && result_value.compare(pstring) == 0) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << result_value << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && result_value.compare(pstring) < 0 ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && result_value.compare(pstring) > 0 ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << result << "\n";
+           return false;
+          }
+            break;
+          }
+        case Type::FIXED_LEN_BYTE_ARRAY:
+          {
+            parquet::FLBA str;
+            char* predicate = vals.a;
+            int64_reader->callReadBatch(1,&str,&values_read);
+            std::string result_value = parquet::FixedLenByteArrayToString(str,sizeof(str));
+            std::string result(result_value.substr(result_value.length()-strlen(predicate),strlen(predicate)));
+            row_counter = ind;
+            // std::cout << "row number: " << row_counter << " " << result << "\n";
+            if ( equal_to == 0 && checkpredicate && strcmp(result.c_str(),predicate) == 0) {
+           row_counter = ind;
+           runfile << "with predicate row number: " << row_counter << " " << result << "\n";
+           //std::cout << "predicate: " << *((int64_t*)predicate) << std::endl;
+           return true;
+          }
+          else if ( equal_to == -1 && checkpredicate && strcmp(result.c_str(),predicate) < 0 ){
+
+          }
+          else if ( equal_to == 1 && checkpredicate && strcmp(result.c_str(),predicate) > 0 ) {
+
+          }
+          else{
+            row_counter = ind;
+           //std::cout << "row number: " << row_counter << " " << result << "\n";
+           return false;
+          }
+          break;
+          }
+        default:{
+           parquet::ParquetException::NYI("type reader not implemented");
+           return false;
+        }
+      }
+      return false;
+        
+}
diff --git a/cpp/examples/parquet/low-level-api/reader-writer.cc b/cpp/examples/parquet/low-level-api/reader-writer.cc
index 82232ddbe7b..152738f0ed5 100644
--- a/cpp/examples/parquet/low-level-api/reader-writer.cc
+++ b/cpp/examples/parquet/low-level-api/reader-writer.cc
@@ -42,8 +42,8 @@
  * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
  **/
 
-constexpr int NUM_ROWS_PER_ROW_GROUP = 500;
-const char PARQUET_FILENAME[] = "parquet_cpp_example.parquet";
+constexpr int NUM_ROWS_PER_ROW_GROUP = 15000000;
+const char PARQUET_FILENAME[] = "parquet_cpp_example_15M.parquet";
 
 int main(int argc, char** argv) {
   /**********************************************************************************
@@ -57,12 +57,14 @@ int main(int argc, char** argv) {
     using FileClass = ::arrow::io::FileOutputStream;
     std::shared_ptr<FileClass> out_file;
     PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME));
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
 
     // Setup the parquet schema
     std::shared_ptr<GroupNode> schema = SetupSchema();
 
     // Add writer properties
     parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
     builder.compression(parquet::Compression::SNAPPY);
     std::shared_ptr<parquet::WriterProperties> props = builder.build();
 
@@ -73,6 +75,13 @@ int main(int argc, char** argv) {
     // Append a RowGroup with a specific number of rows.
     parquet::RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
 
+    // // Write the Bool column
+    // parquet::BoolWriter* bool_writer =
+    //     static_cast<parquet::BoolWriter*>(rg_writer->NextColumn());
+    // for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+    //   bool value = ((i % 2) == 0) ? true : false;
+    //   bool_writer->WriteBatch(1, nullptr, nullptr, &value);
+    // }
     // Write the Bool column
     parquet::BoolWriter* bool_writer =
         static_cast<parquet::BoolWriter*>(rg_writer->NextColumn());
diff --git a/cpp/examples/parquet/low-level-api/reader_writer_forindex.h b/cpp/examples/parquet/low-level-api/reader_writer_forindex.h
new file mode 100644
index 00000000000..7e149d15753
--- /dev/null
+++ b/cpp/examples/parquet/low-level-api/reader_writer_forindex.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/io/file.h>
+#include <arrow/util/logging.h>
+
+#include <parquet/api/reader.h>
+#include <parquet/api/writer.h>
+
+using parquet::ConvertedType;
+using parquet::Repetition;
+using parquet::Type;
+using parquet::schema::GroupNode;
+using parquet::schema::PrimitiveNode;
+
+constexpr int FIXED_LENGTH = 10;
+
+static std::shared_ptr<GroupNode> SetupSchema() {
+  parquet::schema::NodeVector fields;
+
+
+        // Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED,
+       // logical type:TIME_MILLIS
+         fields.push_back(PrimitiveNode::Make("int32_field1", Repetition::REQUIRED, Type::INT32,ConvertedType::NONE));
+         
+        // Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED
+        fields.push_back(PrimitiveNode::Make("int64_field1", Repetition::REQUIRED, Type::INT64,ConvertedType::NONE));
+              
+        fields.push_back(PrimitiveNode::Make("float_field1", Repetition::REQUIRED, Type::FLOAT,ConvertedType::NONE));
+              
+        fields.push_back(PrimitiveNode::Make("double_field1", Repetition::REQUIRED, Type::DOUBLE,ConvertedType::NONE));
+                 
+        // Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL
+       fields.push_back(PrimitiveNode::Make("ba_field1", Repetition::OPTIONAL, Type::BYTE_ARRAY,ConvertedType::NONE));
+
+  // Create a GroupNode named 'schema' using the primitive nodes defined above
+  // This GroupNode is the root node of the schema tree
+  return std::static_pointer_cast<GroupNode>(
+      GroupNode::Make("schema", Repetition::REQUIRED, fields));
+}
diff --git a/cpp/examples/parquet/low-level-api/writer-with-index.cc b/cpp/examples/parquet/low-level-api/writer-with-index.cc
new file mode 100644
index 00000000000..56eb9cb6202
--- /dev/null
+++ b/cpp/examples/parquet/low-level-api/writer-with-index.cc
@@ -0,0 +1,753 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cstdio>
+#include <reader_writer_forindex.h>
+#include <iomanip>
+#include <sys/time.h>
+
+#include "parquet/column_reader.h"
+#include "parquet/column_scanner.h"
+#include "parquet/deprecated_io.h"
+#include "parquet/exception.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+/*
+ * This example illustrates PARQUET-1404 for page level skipping in  
+ * writing and reading Parquet Files in C++ and serves as a
+ * reference to the API for reader and writer enhanced with Column Index and Offset Index
+ * The file contains all the physical data types supported by Parquet.
+ * This example uses the RowGroupWriter API that supports writing RowGroups based on a
+ *certain size
+ **/
+
+/* Parquet is a structured columnar file format
+ * Parquet File = "Parquet data" + "Parquet Metadata"
+ * "Parquet data" is simply a vector of RowGroups. Each RowGroup is a batch of rows in a
+ * columnar layout
+ * "Parquet Metadata" contains the "file schema" and attributes of the RowGroups and their
+ * Columns
+ * "file schema" is a tree where each node is either a primitive type (leaf nodes) or a
+ * complex (nested) type (internal nodes)
+ * For specific details, please refer the format here:
+ * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+ **/
+
+
+/*********************************************************************************
+                   PARQUET WRITER WITH PAGE SKIPPING EXAMPLE
+**********************************************************************************/
+
+void writecolswithindexbf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){
+    uint32_t num_bytes = 0;
+    rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp);
+    
+    // Write the Int32 column
+    parquet::Int32Writer* int32_writer =
+        static_cast<parquet::Int32Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int32_t value = i*int32factor;
+      int32_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+        // Write the Int64 column. Each row has not[repeats twice].
+    parquet::Int64Writer* int64_writer =
+        static_cast<parquet::Int64Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int64_t value = i*int64factor;
+      int64_writer->WriteBatch(1, nullptr,nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+
+    // Write the Float column
+    parquet::FloatWriter* float_writer =
+        static_cast<parquet::FloatWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+   
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      float value = static_cast<float>(i) * float_factor;//1.1f;
+      float_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+         // Write the Double column
+    parquet::DoubleWriter* double_writer =
+        static_cast<parquet::DoubleWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      double value = i * double_factor;//1.1111111;
+      double_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+        // Write the ByteArray column. Make every alternate values NULL
+    parquet::ByteArrayWriter* ba_writer =
+        static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH];// = "parquet";
+      int64_t startnumber = i;
+      for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) {
+          hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(&value);
+      // } else {
+      //   int16_t definition_level = 1;
+      //   value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]);
+      //   value.len = FIXED_LENGTH;
+      //   ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //   rg_writer->AppendRowGroupBloomFilter(&value);
+      // }
+    }
+    std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+}
+
+void writecolswithindexbfunsorted(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp, int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){
+    uint32_t num_bytes = 0;
+    rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp);
+    srand(time(NULL));
+         // Write the Int32 column
+    parquet::Int32Writer* int32_writer =
+        static_cast<parquet::Int32Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int32_t value = rand()%NUM_ROWS_PER_ROW_GROUP;
+      int32_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+     
+    srand(time(NULL));
+    // Write the Int64 column. Each row has not[repeats twice].
+    parquet::Int64Writer* int64_writer =
+        static_cast<parquet::Int64Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int64_t value = rand()%NUM_ROWS_PER_ROW_GROUP;
+      int64_writer->WriteBatch(1, nullptr,nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+    srand(time(NULL));
+         // Write the Float column
+    parquet::FloatWriter* float_writer =
+        static_cast<parquet::FloatWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      float value = static_cast<float>(rand()%NUM_ROWS_PER_ROW_GROUP) * float_factor;//1.1f;
+      float_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+     srand(time(NULL));
+         // Write the Double column
+    parquet::DoubleWriter* double_writer =
+        static_cast<parquet::DoubleWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      double value = rand()%NUM_ROWS_PER_ROW_GROUP * double_factor;//1.1111111;
+      double_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+     srand(time(NULL));
+         // Write the ByteArray column. Make every alternate values NULL
+    parquet::ByteArrayWriter* ba_writer =
+        static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH];// = "parquet";
+      int64_t startnumber = i;
+      for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) {
+          hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(&value);
+      // } else {
+      //   int16_t definition_level = 1;
+      //   value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]);
+      //   value.len = FIXED_LENGTH;
+      //   ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //   rg_writer->AppendRowGroupBloomFilter(&value);
+      // }
+    }
+    std::cout << "number of bytes ByteArray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+     
+}
+
+void writeparquetwithindexbf(int NUM_ROWS, int num_rg, float fpp) {
+  const char* filename_1 = "parquet_cpp_example_";
+  std::string s1(std::to_string(NUM_ROWS)+"_");
+  const char* filename_2 = s1.c_str();
+  std::string s2(std::to_string(num_rg));
+  const char* filename_4 = s2.c_str();
+  const char* filename_3 = "_sorted.parquet";
+
+  char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)];
+  strcpy(PARQUET_FILENAME,filename_1);
+  strcat(PARQUET_FILENAME,filename_2);
+  strcat(PARQUET_FILENAME,filename_4);
+  strcat(PARQUET_FILENAME,filename_3);
+  /**********************************************************************************
+                             PARQUET WRITER EXAMPLE
+  **********************************************************************************/
+  // parquet::REQUIRED fields do not need definition and repetition level values
+  // parquet::OPTIONAL fields require only definition level values
+  // parquet::REPEATED fields require both definition and repetition level values
+  try {
+    // Create a local file output stream instance.
+    using FileClass = ::arrow::io::FileOutputStream;
+    std::shared_ptr<FileClass> out_file;
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
+
+    // Setup the parquet schema
+    std::shared_ptr<GroupNode> schema = SetupSchema();
+
+    // Add writer properties
+    parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
+    std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+    // Create a ParquetFileWriter instance
+    std::shared_ptr<parquet::ParquetFileWriter> file_writer =
+        parquet::ParquetFileWriter::Open(out_file, schema, props);
+
+    // Append a RowGroup with a specific number of rows.
+    parquet::RowGroupWriter* rg_writer;
+    for ( int i=0; i < num_rg; i++) {
+      rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg);
+      writecolswithindexbf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124);
+    }
+    // Close the ParquetFileWriter
+    file_writer->CloseWithIndex(true,true);
+
+    // Write the bytes to file
+    DCHECK(out_file->Close().ok());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet write error: " << e.what() << std::endl;
+    //return -1;
+  }
+}
+
+void writeparquetwithindexbfunsorted(int NUM_ROWS, int num_rg,float fpp) {
+  const char* filename_1 = "parquet_cpp_example_";
+  std::string s1(std::to_string(NUM_ROWS)+"_");
+  const char* filename_2 = s1.c_str();
+  std::string s2(std::to_string(num_rg));
+  const char* filename_4 = s2.c_str();
+  const char* filename_3 = "_unsorted.parquet";
+  
+  char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)];
+  strcpy(PARQUET_FILENAME,filename_1);
+  strcat(PARQUET_FILENAME,filename_2);
+  strcat(PARQUET_FILENAME,filename_4);
+  strcat(PARQUET_FILENAME,filename_3);
+  /**********************************************************************************
+                             PARQUET WRITER EXAMPLE
+  **********************************************************************************/
+  // parquet::REQUIRED fields do not need definition and repetition level values
+  // parquet::OPTIONAL fields require only definition level values
+  // parquet::REPEATED fields require both definition and repetition level values
+  try {
+    // Create a local file output stream instance.
+    using FileClass = ::arrow::io::FileOutputStream;
+    std::shared_ptr<FileClass> out_file;
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
+
+    // Setup the parquet schema
+    std::shared_ptr<GroupNode> schema = SetupSchema();
+
+    // Add writer properties
+    parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
+    std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+    // Create a ParquetFileWriter instance
+    std::shared_ptr<parquet::ParquetFileWriter> file_writer =
+        parquet::ParquetFileWriter::Open(out_file, schema, props);
+
+    // Append a RowGroup with a specific number of rows.
+    parquet::RowGroupWriter* rg_writer;
+    for ( int i=0; i < num_rg; i++) {
+      rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg);
+      writecolswithindexbf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124);
+    }
+
+    // Close the ParquetFileWriter
+    file_writer->CloseWithIndex(true,true);
+
+    // Write the bytes to file
+    DCHECK(out_file->Close().ok());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet write error: " << e.what() << std::endl;
+    //return -1;
+  }
+}
+
+void writecolswithoutindexbf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){
+    uint32_t num_bytes = 0;
+    //rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes);
+    
+    // Write the Int32 column
+    parquet::Int32Writer* int32_writer =
+        static_cast<parquet::Int32Writer*>(rg_writer->NextColumn());
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int32_t value = i*int32factor;
+      int32_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+        // Write the Int64 column. Each row has not[repeats twice].
+    parquet::Int64Writer* int64_writer =
+        static_cast<parquet::Int64Writer*>(rg_writer->NextColumn());
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int64_t value = i*int64factor;
+      int64_writer->WriteBatch(1, nullptr,nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+
+    // Write the Float column
+    parquet::FloatWriter* float_writer =
+        static_cast<parquet::FloatWriter*>(rg_writer->NextColumn());
+   
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      float value = static_cast<float>(i) * float_factor;//1.1f;
+      float_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+         // Write the Double column
+    parquet::DoubleWriter* double_writer =
+        static_cast<parquet::DoubleWriter*>(rg_writer->NextColumn());
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      double value = i * double_factor;//1.1111111;
+      double_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+        // Write the ByteArray column. Make every alternate values NULL
+    parquet::ByteArrayWriter* ba_writer =
+        static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumn());
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH];// = "parquet";
+      int64_t startnumber = i;
+      for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) {
+          hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(&value);
+      // } else {
+      //   int16_t definition_level = 1;
+      //   value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]);
+      //   value.len = FIXED_LENGTH;
+      //   ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //   rg_writer->AppendRowGroupBloomFilter(&value);
+      // }
+    }
+    std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+}
+
+void writeparquetwithoutindexbf(int NUM_ROWS, int num_rg, float fpp) {
+  const char* filename_1 = "parquet_cpp_example_";
+  std::string s1(std::to_string(NUM_ROWS)+"_");
+  const char* filename_2 = s1.c_str();
+  std::string s2(std::to_string(num_rg));
+  const char* filename_4 = s2.c_str();
+  const char* filename_3 = "_WOIBF-sorted.parquet";
+
+  char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)];
+  strcpy(PARQUET_FILENAME,filename_1);
+  strcat(PARQUET_FILENAME,filename_2);
+  strcat(PARQUET_FILENAME,filename_4);
+  strcat(PARQUET_FILENAME,filename_3);
+  /**********************************************************************************
+                             PARQUET WRITER EXAMPLE
+  **********************************************************************************/
+  // parquet::REQUIRED fields do not need definition and repetition level values
+  // parquet::OPTIONAL fields require only definition level values
+  // parquet::REPEATED fields require both definition and repetition level values
+  try {
+    // Create a local file output stream instance.
+    using FileClass = ::arrow::io::FileOutputStream;
+    std::shared_ptr<FileClass> out_file;
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
+
+    // Setup the parquet schema
+    std::shared_ptr<GroupNode> schema = SetupSchema();
+
+    // Add writer properties
+    parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
+    std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+    // Create a ParquetFileWriter instance
+    std::shared_ptr<parquet::ParquetFileWriter> file_writer =
+        parquet::ParquetFileWriter::Open(out_file, schema, props);
+
+    // Append a RowGroup with a specific number of rows.
+    parquet::RowGroupWriter* rg_writer;
+    for ( int i=0; i < num_rg; i++) {
+      rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg);
+      writecolswithoutindexbf(NUM_ROWS/num_rg,rg_writer,1,1,1.1f,1.1111111,124);
+    }
+    // Close the ParquetFileWriter
+    file_writer->CloseWithIndex(false,false);
+
+    // Write the bytes to file
+    DCHECK(out_file->Close().ok());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet write error: " << e.what() << std::endl;
+    //return -1;
+  }
+}
+
+void writecolsonlyindex(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp, int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){
+    uint32_t num_bytes = 0;
+    //rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes);
+    
+    // Write the Int32 column
+    parquet::Int32Writer* int32_writer =
+        static_cast<parquet::Int32Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int32_t value = i*int32factor;
+      int32_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+        // Write the Int64 column. Each row has not[repeats twice].
+    parquet::Int64Writer* int64_writer =
+        static_cast<parquet::Int64Writer*>(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int64_t value = i*int64factor;
+      int64_writer->WriteBatch(1, nullptr,nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+
+    // Write the Float column
+    parquet::FloatWriter* float_writer =
+        static_cast<parquet::FloatWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp));
+   
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      float value = static_cast<float>(i) * float_factor;//1.1f;
+      float_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+         // Write the Double column
+    parquet::DoubleWriter* double_writer =
+        static_cast<parquet::DoubleWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      double value = i * double_factor;//1.1111111;
+      double_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+        // Write the ByteArray column. Make every alternate values NULL
+    parquet::ByteArrayWriter* ba_writer =
+        static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumnWithIndex(num_bytes,true,false,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH];// = "parquet";
+      int64_t startnumber = i;
+      for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) {
+          hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //rg_writer->AppendRowGroupBloomFilter(&value);
+      // } else {
+      //   int16_t definition_level = 1;
+      //   value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]);
+      //   value.len = FIXED_LENGTH;
+      //   ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //   rg_writer->AppendRowGroupBloomFilter(&value);
+      // }
+    }
+    std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+}
+
+void writeparquetonlyindex(int NUM_ROWS, int num_rg, float fpp) {
+  const char* filename_1 = "parquet_cpp_example_";
+  std::string s1(std::to_string(NUM_ROWS)+"_");
+  const char* filename_2 = s1.c_str();
+  std::string s2(std::to_string(num_rg));
+  const char* filename_4 = s2.c_str();
+  const char* filename_3 = "_only-index-sorted.parquet";
+
+  char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)];
+  strcpy(PARQUET_FILENAME,filename_1);
+  strcat(PARQUET_FILENAME,filename_2);
+  strcat(PARQUET_FILENAME,filename_4);
+  strcat(PARQUET_FILENAME,filename_3);
+  /**********************************************************************************
+                             PARQUET WRITER EXAMPLE
+  **********************************************************************************/
+  // parquet::REQUIRED fields do not need definition and repetition level values
+  // parquet::OPTIONAL fields require only definition level values
+  // parquet::REPEATED fields require both definition and repetition level values
+  try {
+    // Create a local file output stream instance.
+    using FileClass = ::arrow::io::FileOutputStream;
+    std::shared_ptr<FileClass> out_file;
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
+
+    // Setup the parquet schema
+    std::shared_ptr<GroupNode> schema = SetupSchema();
+
+    // Add writer properties
+    parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
+    std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+    // Create a ParquetFileWriter instance
+    std::shared_ptr<parquet::ParquetFileWriter> file_writer =
+        parquet::ParquetFileWriter::Open(out_file, schema, props);
+
+    // Append a RowGroup with a specific number of rows.
+    parquet::RowGroupWriter* rg_writer;
+    for ( int i=0; i < num_rg; i++) {
+      rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg);
+      writecolsonlyindex(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124);
+    }
+    // Close the ParquetFileWriter
+    file_writer->CloseWithIndex(true,false);
+
+    // Write the bytes to file
+    DCHECK(out_file->Close().ok());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet write error: " << e.what() << std::endl;
+    //return -1;
+  }
+}
+
+void writecolsonlybf(int NUM_ROWS_PER_ROW_GROUP,parquet::RowGroupWriter*& rg_writer,float fpp,int32_t int32factor,int64_t int64factor, float float_factor,double double_factor,int FIXED_LENGTH){
+    uint32_t num_bytes = 0;
+    rg_writer->InitBloomFilter(NUM_ROWS_PER_ROW_GROUP,num_bytes,fpp);
+    
+    // Write the Int32 column
+    parquet::Int32Writer* int32_writer =
+        static_cast<parquet::Int32Writer*>(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int32_t value = i*int32factor;
+      int32_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int32 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+        // Write the Int64 column. Each row has not[repeats twice].
+    parquet::Int64Writer* int64_writer =
+        static_cast<parquet::Int64Writer*>(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      int64_t value = i*int64factor;
+      int64_writer->WriteBatch(1, nullptr,nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes int64 " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+
+    // Write the Float column
+    parquet::FloatWriter* float_writer =
+        static_cast<parquet::FloatWriter*>(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp));
+   
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      float value = static_cast<float>(i) * float_factor;//1.1f;
+      float_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes float " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+         // Write the Double column
+    parquet::DoubleWriter* double_writer =
+        static_cast<parquet::DoubleWriter*>(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      double value = i * double_factor;//1.1111111;
+      double_writer->WriteBatch(1, nullptr, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(value);
+    }
+    std::cout << "number of bytes double " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+       
+        // Write the ByteArray column. Make every alternate values NULL
+    parquet::ByteArrayWriter* ba_writer =
+        static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumnWithIndex(num_bytes,false,true,fpp));
+    
+    for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH];// = "parquet";
+      int64_t startnumber = i;
+      for ( int ci = 0; ci < FIXED_LENGTH; ci++ ) {
+          hello[FIXED_LENGTH-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      rg_writer->AppendRowGroupBloomFilter(&value);
+      // } else {
+      //   int16_t definition_level = 1;
+      //   value.ptr = reinterpret_cast<const uint8_t*>(&hello[0]);
+      //   value.len = FIXED_LENGTH;
+      //   ba_writer->WriteBatch(1, &definition_level, nullptr, &value, true);
+      //   rg_writer->AppendRowGroupBloomFilter(&value);
+      // }
+    }
+    std::cout << "number of bytes bytearray " << num_bytes/NUM_ROWS_PER_ROW_GROUP << std::endl;
+      
+}
+
+void writeparquetonlybf(int NUM_ROWS, int num_rg, float fpp) {
+  const char* filename_1 = "parquet_cpp_example_";
+  std::string s1(std::to_string(NUM_ROWS)+"_");
+  const char* filename_2 = s1.c_str();
+  std::string s2(std::to_string(num_rg));
+  const char* filename_4 = s2.c_str();
+  const char* filename_3 = "_only-bf-sorted.parquet";
+
+  char PARQUET_FILENAME[strlen(filename_1) + strlen(filename_2) + strlen(filename_4) + strlen(filename_3)];
+  strcpy(PARQUET_FILENAME,filename_1);
+  strcat(PARQUET_FILENAME,filename_2);
+  strcat(PARQUET_FILENAME,filename_4);
+  strcat(PARQUET_FILENAME,filename_3);
+  /**********************************************************************************
+                             PARQUET WRITER EXAMPLE
+  **********************************************************************************/
+  // parquet::REQUIRED fields do not need definition and repetition level values
+  // parquet::OPTIONAL fields require only definition level values
+  // parquet::REPEATED fields require both definition and repetition level values
+  try {
+    // Create a local file output stream instance.
+    using FileClass = ::arrow::io::FileOutputStream;
+    std::shared_ptr<FileClass> out_file;
+    PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
+
+    // Setup the parquet schema
+    std::shared_ptr<GroupNode> schema = SetupSchema();
+
+    // Add writer properties
+    parquet::WriterProperties::Builder builder;
+    builder.compression(parquet::Compression::UNCOMPRESSED);
+    std::shared_ptr<parquet::WriterProperties> props = builder.build();
+
+    // Create a ParquetFileWriter instance
+    std::shared_ptr<parquet::ParquetFileWriter> file_writer =
+        parquet::ParquetFileWriter::Open(out_file, schema, props);
+
+    // Append a RowGroup with a specific number of rows.
+    parquet::RowGroupWriter* rg_writer;
+    for ( int i=0; i < num_rg; i++) {
+      rg_writer = file_writer->AppendRowGroup(NUM_ROWS/num_rg);
+      writecolsonlybf(NUM_ROWS/num_rg,rg_writer,fpp,1,1,1.1f,1.1111111,124);
+    }
+    // Close the ParquetFileWriter
+    file_writer->CloseWithIndex(false,true);
+
+    // Write the bytes to file
+    DCHECK(out_file->Close().ok());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet write error: " << e.what() << std::endl;
+    //return -1;
+  }
+}
+
+int main(int argc, char** argv) {
+  if (argc == 4){
+    int NUM_ROWS = atoi(argv[1]);
+    int num_rg = atoi(argv[2]);
+    float fpp = atof(argv[3]);
+    //writeparquetwithoutindexbf(NUM_ROWS,num_rg,fpp);
+    //writeparquetonlyindex(NUM_ROWS,num_rg,fpp);
+    //writeparquetonlybf(NUM_ROWS,num_rg,fpp);
+    writeparquetwithindexbfunsorted(NUM_ROWS,num_rg,fpp);
+  }
+  
+  std::cout << "Parquet Writing and Reading Complete" << std::endl;
+
+  return 0;
+}
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
new file mode 100644
index 00000000000..58072b61381
--- /dev/null
+++ b/cpp/src/arrow/util/io-util.cc
@@ -0,0 +1,983 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Ensure 64-bit off_t for platforms where it matters
+#ifdef _FILE_OFFSET_BITS
+#undef _FILE_OFFSET_BITS
+#endif
+
+#define _FILE_OFFSET_BITS 64
+
+#include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <random>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>  // IWYU pragma: keep
+
+// Defines that don't exist in MinGW
+#if defined(__MINGW32__)
+#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR
+#elif defined(_MSC_VER)  // Visual Studio
+
+#else  // gcc / clang on POSIX platforms
+#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH
+#endif
+
+#include <boost/filesystem.hpp>
+
+// ----------------------------------------------------------------------
+// file compatibility stuff
+
+#if defined(_WIN32)
+#include <io.h>
+#include <share.h>
+#endif
+
+#ifdef _WIN32  // Windows
+#include "arrow/io/mman.h"
+#undef Realloc
+#undef Free
+#else  // POSIX-like platforms
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+// define max read/write count
+#if defined(_WIN32)
+#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#else
+
+#ifdef __APPLE__
+// due to macOS bug, we need to set read/write max
+#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#else
+// see notes on Linux read/write manpage
+#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
+#endif
+
+#endif
+
+#include "arrow/buffer.h"
+#include "arrow/util/io-util.h"
+#include "arrow/util/logging.h"
+
+// For filename conversion
+#if defined(_WIN32)
+#include "arrow/util/utf8.h"
+#endif
+
+namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Status StdoutStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Status StderrStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Status StdinStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, int64_t* bytes_read, void* out) {
+  std::cin.read(reinterpret_cast<char*>(out), nbytes);
+  if (std::cin) {
+    *bytes_read = nbytes;
+    pos_ += nbytes;
+  } else {
+    *bytes_read = 0;
+  }
+  return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+  std::shared_ptr<ResizableBuffer> buffer;
+  ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer));
+  int64_t bytes_read;
+  ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data()));
+  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+  buffer->ZeroPadding();
+  *out = buffer;
+  return Status::OK();
+}
+
+}  // namespace io
+
+namespace internal {
+
+namespace bfs = ::boost::filesystem;
+
+namespace {
+
+Status StringToNative(const std::string& s, NativePathString* out) {
+#if _WIN32
+  std::wstring ws;
+  RETURN_NOT_OK(::arrow::util::UTF8ToWideString(s, &ws));
+  *out = std::move(ws);
+#else
+  *out = s;
+#endif
+  return Status::OK();
+}
+
+}  // namespace
+
+#define BOOST_FILESYSTEM_TRY try {
+#define BOOST_FILESYSTEM_CATCH           \
+  }                                      \
+  catch (bfs::filesystem_error & _err) { \
+    return ToStatus(_err);               \
+  }
+
+// NOTE: catching filesystem_error gives more context than system::error_code
+// (it includes the file path(s) in the error message)
+
+static Status ToStatus(const bfs::filesystem_error& err) {
+  return Status::IOError(err.what());
+}
+
+static std::string MakeRandomName(int num_chars) {
+  static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
+  std::random_device gen;
+  std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 1));
+
+  std::string s;
+  s.reserve(num_chars);
+  for (int i = 0; i < num_chars; ++i) {
+    s += chars[dist(gen)];
+  }
+  return s;
+}
+
+std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
+
+#if _WIN32
+std::string WinErrorMessage(int errnum) {
+  char buf[1024];
+  auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+                               NULL, errnum, 0, buf, sizeof(buf), NULL);
+  if (nchars == 0) {
+    // Fallback
+    std::stringstream ss;
+    ss << "Windows error #" << errnum;
+    return ss.str();
+  }
+  return std::string(buf, nchars);
+}
+#endif
+
+//
+// PlatformFilename implementation
+//
+
+struct PlatformFilename::Impl {
+  Impl() = default;
+  explicit Impl(bfs::path p) : path(p.make_preferred()) {}
+
+  bfs::path path;
+};
+
+PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
+
+PlatformFilename::~PlatformFilename() {}
+
+PlatformFilename::PlatformFilename(const Impl& impl) : impl_(new Impl(impl)) {}
+
+PlatformFilename::PlatformFilename(Impl&& impl) : impl_(new Impl(std::move(impl))) {}
+
+PlatformFilename::PlatformFilename(const PlatformFilename& other)
+    : PlatformFilename(Impl{other.impl_->path}) {}
+
+PlatformFilename::PlatformFilename(PlatformFilename&& other)
+    : impl_(std::move(other.impl_)) {}
+
+PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
+  this->impl_.reset(new Impl{other.impl_->path});
+  return *this;
+}
+
+PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
+  this->impl_ = std::move(other.impl_);
+  return *this;
+}
+
+PlatformFilename::PlatformFilename(const NativePathString& path)
+    : PlatformFilename(Impl{path}) {}
+
+const NativePathString& PlatformFilename::ToNative() const {
+  return impl_->path.native();
+}
+
+std::string PlatformFilename::ToString() const {
+#if _WIN32
+  std::wstring ws = impl_->path.generic_wstring();
+  std::string s;
+  Status st = ::arrow::util::WideStringToUTF8(ws, &s);
+  if (!st.ok()) {
+    std::stringstream ss;
+    ss << "<Unrepresentable filename: " << st.ToString() << ">";
+    return ss.str();
+  }
+  return s;
+#else
+  return impl_->path.generic_string();
+#endif
+}
+
+Status PlatformFilename::FromString(const std::string& file_name, PlatformFilename* out) {
+  if (file_name.find_first_of('\0') != std::string::npos) {
+    return Status::Invalid("Embedded NUL char in file name: '", file_name, "'");
+  }
+  NativePathString ns;
+  RETURN_NOT_OK(StringToNative(file_name, &ns));
+  *out = PlatformFilename(std::move(ns));
+  return Status::OK();
+}
+
+Status PlatformFilename::Join(const std::string& child_name,
+                              PlatformFilename* out) const {
+  NativePathString ns;
+  RETURN_NOT_OK(StringToNative(child_name, &ns));
+  auto path = impl_->path / ns;
+  *out = PlatformFilename(Impl{std::move(path)});
+  return Status::OK();
+}
+
+Status CreateDir(const PlatformFilename& dir_path, bool* created) {
+  bool res;
+  BOOST_FILESYSTEM_TRY
+  res = bfs::create_directory(dir_path.impl_->path);
+  BOOST_FILESYSTEM_CATCH
+  if (created) {
+    *created = res;
+  }
+  return Status::OK();
+}
+
+Status CreateDirTree(const PlatformFilename& dir_path, bool* created) {
+  bool res;
+  BOOST_FILESYSTEM_TRY
+  res = bfs::create_directories(dir_path.impl_->path);
+  BOOST_FILESYSTEM_CATCH
+  if (created) {
+    *created = res;
+  }
+  return Status::OK();
+}
+
+Status DeleteDirTree(const PlatformFilename& dir_path, bool* deleted) {
+  BOOST_FILESYSTEM_TRY
+  const auto& path = dir_path.impl_->path;
+  // XXX There is a race here.
+  auto st = bfs::symlink_status(path);
+  if (st.type() != bfs::file_not_found && st.type() != bfs::directory_file) {
+    return Status::IOError("Cannot delete non -directory '", path.string(), "'");
+  }
+  auto n_removed = bfs::remove_all(path);
+  if (deleted) {
+    *deleted = n_removed != 0;
+  }
+  BOOST_FILESYSTEM_CATCH
+  return Status::OK();
+}
+
+Status DeleteFile(const PlatformFilename& file_path, bool* deleted) {
+  BOOST_FILESYSTEM_TRY
+  bool res = false;
+  const auto& path = file_path.impl_->path;
+  // XXX There is a race here, and boost::filesystem doesn't allow deleting
+  // only files and not empty directories.
+  auto st = bfs::symlink_status(path);
+  if (!bfs::is_directory(st)) {
+    res = bfs::remove(path);
+  } else {
+    return Status::IOError("Cannot delete directory '", path.string(), "'");
+  }
+  if (deleted) {
+    *deleted = res;
+  }
+  BOOST_FILESYSTEM_CATCH
+  return Status::OK();
+}
+
+Status FileExists(const PlatformFilename& path, bool* out) {
+  BOOST_FILESYSTEM_TRY
+  *out = bfs::exists(path.impl_->path);
+  BOOST_FILESYSTEM_CATCH
+  return Status::OK();
+}
+
+//
+// File name handling
+//
+
+Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
+  return PlatformFilename::FromString(file_name, out);
+}
+
+//
+// Functions for creating file descriptors
+//
+
+#define CHECK_LSEEK(retval) \
+  if ((retval) == -1) return Status::IOError("lseek failed");
+
+static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
+#if defined(_WIN32)
+  return _lseeki64(fd, pos, whence);
+#else
+  return lseek(fd, pos, whence);
+#endif
+}
+
+static inline Status CheckFileOpResult(int ret, int errno_actual,
+                                       const PlatformFilename& file_name,
+                                       const char* opname) {
+  if (ret == -1) {
+#ifdef _WIN32
+    int winerr = GetLastError();
+    if (winerr != ERROR_SUCCESS) {
+      return Status::IOError("Failed to ", opname, " file '", file_name.ToString(),
+                             "', error: ", WinErrorMessage(winerr));
+    }
+#endif
+    return Status::IOError("Failed to ", opname, " file '", file_name.ToString(),
+                           "', error: ", ErrnoMessage(errno_actual));
+  }
+  return Status::OK();
+}
+
+Status FileOpenReadable(const PlatformFilename& file_name, int* fd) {
+  int ret, errno_actual;
+#if defined(_WIN32)
+  SetLastError(0);
+  errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(),
+                           _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
+  ret = *fd;
+#else
+  ret = *fd = open(file_name.ToNative().c_str(), O_RDONLY);
+  errno_actual = errno;
+
+  if (ret >= 0) {
+    // open(O_RDONLY) succeeds on directories, check for it
+    struct stat st;
+    ret = fstat(*fd, &st);
+    if (ret == -1) {
+      ARROW_UNUSED(FileClose(*fd));
+      // Will propagate error below
+    } else if (S_ISDIR(st.st_mode)) {
+      ARROW_UNUSED(FileClose(*fd));
+      return Status::IOError("Cannot open for reading: path '", file_name.ToString(),
+                             "' is a directory");
+    }
+  }
+#endif
+
+  return CheckFileOpResult(ret, errno_actual, file_name, "open local");
+}
+
+Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool truncate,
+                        bool append, int* fd) {
+  int ret, errno_actual;
+
+#if defined(_WIN32)
+  SetLastError(0);
+  int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
+  int pmode = _S_IREAD | _S_IWRITE;
+
+  if (truncate) {
+    oflag |= _O_TRUNC;
+  }
+  if (append) {
+    oflag |= _O_APPEND;
+  }
+
+  if (write_only) {
+    oflag |= _O_WRONLY;
+  } else {
+    oflag |= _O_RDWR;
+  }
+
+  errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(), oflag, _SH_DENYNO, pmode);
+  ret = *fd;
+
+#else
+  int oflag = O_CREAT;
+
+  if (truncate) {
+    oflag |= O_TRUNC;
+  }
+  if (append) {
+    oflag |= O_APPEND;
+  }
+
+  if (write_only) {
+    oflag |= O_WRONLY;
+  } else {
+    oflag |= O_RDWR;
+  }
+
+  ret = *fd = open(file_name.ToNative().c_str(), oflag, ARROW_WRITE_SHMODE);
+  errno_actual = errno;
+#endif
+  RETURN_NOT_OK(CheckFileOpResult(ret, errno_actual, file_name, "open local"));
+  if (append) {
+    // Seek to end, as O_APPEND does not necessarily do it
+    auto ret = lseek64_compat(*fd, 0, SEEK_END);
+    if (ret == -1) {
+      ARROW_UNUSED(FileClose(*fd));
+      return Status::IOError("lseek failed");
+    }
+  }
+  return Status::OK();
+}
+
+Status FileTell(int fd, int64_t* pos) {
+  int64_t current_pos;
+
+#if defined(_WIN32)
+  current_pos = _telli64(fd);
+  if (current_pos == -1) {
+    return Status::IOError("_telli64 failed");
+  }
+#else
+  current_pos = lseek64_compat(fd, 0, SEEK_CUR);
+  CHECK_LSEEK(current_pos);
+#endif
+
+  *pos = current_pos;
+  return Status::OK();
+}
+
+Status CreatePipe(int fd[2]) {
+  int ret;
+#if defined(_WIN32)
+  ret = _pipe(fd, 4096, _O_BINARY);
+#else
+  ret = pipe(fd);
+#endif
+
+  if (ret == -1) {
+    return Status::IOError("Error creating pipe: ", ErrnoMessage(errno));
+  }
+  return Status::OK();
+}
+
+static Status StatusFromErrno(const char* prefix) {
+#ifdef _WIN32
+  errno = __map_mman_error(GetLastError(), EPERM);
+#endif
+  return Status::IOError(prefix, ErrnoMessage(errno));
+}
+
+//
+// Compatible way to remap a memory map
+//
+
+Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
+                      void** new_addr) {
+  // should only be called with writable files
+  *new_addr = MAP_FAILED;
+#ifdef _WIN32
+  // flags are ignored on windows
+  HANDLE fm, h;
+
+  if (!UnmapViewOfFile(addr)) {
+    return StatusFromErrno("UnmapViewOfFile failed: ");
+  }
+
+  h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
+  if (h == INVALID_HANDLE_VALUE) {
+    return StatusFromErrno("Cannot get file handle: ");
+  }
+
+  uint64_t new_size64 = new_size;
+  LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
+  LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
+
+  SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
+  SetEndOfFile(h);
+  fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
+  if (fm == NULL) {
+    return StatusFromErrno("CreateFileMapping failed: ");
+  }
+  *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
+  CloseHandle(fm);
+  if (new_addr == NULL) {
+    return StatusFromErrno("MapViewOfFile failed: ");
+  }
+  return Status::OK();
+#else
+#ifdef __APPLE__
+  // we have to close the mmap first, truncate the file to the new size
+  // and recreate the mmap
+  if (munmap(addr, old_size) == -1) {
+    return StatusFromErrno("munmap failed: ");
+  }
+  if (ftruncate(fildes, new_size) == -1) {
+    return StatusFromErrno("ftruncate failed: ");
+  }
+  // we set READ / WRITE flags on the new map, since we could only have
+  // unlarged a RW map in the first place
+  *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
+  if (*new_addr == MAP_FAILED) {
+    return StatusFromErrno("mmap failed: ");
+  }
+  return Status::OK();
+#else
+  if (ftruncate(fildes, new_size) == -1) {
+    return StatusFromErrno("ftruncate failed: ");
+  }
+  *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+  if (*new_addr == MAP_FAILED) {
+    return StatusFromErrno("mremap failed: ");
+  }
+  return Status::OK();
+#endif
+#endif
+}
+
+//
+// Closing files
+//
+
+Status FileClose(int fd) {
+  int ret;
+
+#if defined(_WIN32)
+  ret = static_cast<int>(_close(fd));
+#else
+  ret = static_cast<int>(close(fd));
+#endif
+
+  if (ret == -1) {
+    return Status::IOError("error closing file");
+  }
+  return Status::OK();
+}
+
+//
+// Seeking and telling
+//
+
+Status FileSeek(int fd, int64_t pos, int whence) {
+  int64_t ret = lseek64_compat(fd, pos, whence);
+  CHECK_LSEEK(ret);
+  return Status::OK();
+}
+
+Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
+
+Status FileGetSize(int fd, int64_t* size) {
+#if defined(_WIN32)
+  struct __stat64 st;
+#else
+  struct stat st;
+#endif
+  st.st_size = -1;
+
+#if defined(_WIN32)
+  int ret = _fstat64(fd, &st);
+#else
+  int ret = fstat(fd, &st);
+#endif
+
+  if (ret == -1) {
+    return Status::IOError("error stat()ing file");
+  }
+  if (st.st_size == 0) {
+    // Maybe the file doesn't support getting its size, double-check by
+    // trying to tell() (seekable files usually have a size, while
+    // non-seekable files don't)
+    int64_t position;
+    RETURN_NOT_OK(FileTell(fd, &position));
+  } else if (st.st_size < 0) {
+    return Status::IOError("error getting file size");
+  }
+  *size = st.st_size;
+  return Status::OK();
+}
+
+//
+// Reading data
+//
+
+static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
+#if defined(_WIN32)
+  HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+  DWORD dwBytesRead = 0;
+  OVERLAPPED overlapped = {0};
+  overlapped.Offset = static_cast<uint32_t>(pos);
+  overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
+
+  // Note: ReadFile() will update the file position
+  BOOL bRet =
+      ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
+  if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
+    return dwBytesRead;
+  } else {
+    return -1;
+  }
+#else
+  return static_cast<int64_t>(
+      pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
+#endif
+}
+
+Status FileRead(int fd, uint8_t* buffer, int64_t nbytes, int64_t* bytes_read) {
+  *bytes_read = 0;
+
+  while (*bytes_read < nbytes) {
+    int64_t chunksize =
+        std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read);
+#if defined(_WIN32)
+    int64_t ret =
+        static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
+#else
+    int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
+#endif
+
+    if (ret == -1) {
+      *bytes_read = ret;
+      break;
+    }
+    if (ret == 0) {
+      // EOF
+      break;
+    }
+    buffer += ret;
+    *bytes_read += ret;
+  }
+
+  if (*bytes_read == -1) {
+    return Status::IOError("Error reading bytes from file: ", ErrnoMessage(errno));
+  }
+
+  return Status::OK();
+}
+
+Status FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes,
+                  int64_t* bytes_read) {
+  *bytes_read = 0;
+
+  while (*bytes_read < nbytes) {
+    int64_t chunksize =
+        std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read);
+    int64_t ret = pread_compat(fd, buffer, chunksize, position);
+
+    if (ret == -1) {
+      *bytes_read = ret;
+      break;
+    }
+    if (ret == 0) {
+      // EOF
+      break;
+    }
+    buffer += ret;
+    position += ret;
+    *bytes_read += ret;
+  }
+
+  if (*bytes_read == -1) {
+    return Status::IOError("Error reading bytes from file: ", ErrnoMessage(errno));
+  }
+  return Status::OK();
+}
+
+//
+// Writing data
+//
+
+Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
+  int ret = 0;
+  int64_t bytes_written = 0;
+
+  while (ret != -1 && bytes_written < nbytes) {
+    int64_t chunksize =
+        std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
+#if defined(_WIN32)
+    ret = static_cast<int>(
+        _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
+#else
+    ret = static_cast<int>(
+        write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
+#endif
+
+    if (ret != -1) {
+      bytes_written += ret;
+    }
+  }
+
+  if (ret == -1) {
+    return Status::IOError("Error writing bytes to file: ", ErrnoMessage(errno));
+  }
+  return Status::OK();
+}
+
+Status FileTruncate(int fd, const int64_t size) {
+  int ret, errno_actual;
+
+#ifdef _WIN32
+  errno_actual = _chsize_s(fd, static_cast<size_t>(size));
+  ret = errno_actual == 0 ? 0 : -1;
+#else
+  ret = ftruncate(fd, static_cast<size_t>(size));
+  errno_actual = errno;
+#endif
+
+  if (ret == -1) {
+    return Status::IOError("Error writing bytes to file: ", ErrnoMessage(errno_actual));
+  }
+  return Status::OK();
+}
+
+//
+// Environment variables
+//
+
+Status GetEnvVar(const char* name, std::string* out) {
+#ifdef _WIN32
+  // On Windows, getenv() reads an early copy of the process' environment
+  // which doesn't get updated when SetEnvironmentVariable() is called.
+  constexpr int32_t bufsize = 2000;
+  char c_str[bufsize];
+  auto res = GetEnvironmentVariableA(name, c_str, bufsize);
+  if (res >= bufsize) {
+    return Status::CapacityError("environment variable value too long");
+  } else if (res == 0) {
+    return Status::KeyError("environment variable undefined");
+  }
+  *out = std::string(c_str);
+  return Status::OK();
+#else
+  char* c_str = getenv(name);
+  if (c_str == nullptr) {
+    return Status::KeyError("environment variable undefined");
+  }
+  *out = std::string(c_str);
+  return Status::OK();
+#endif
+}
+
+Status GetEnvVar(const std::string& name, std::string* out) {
+  return GetEnvVar(name.c_str(), out);
+}
+
+Status SetEnvVar(const char* name, const char* value) {
+#ifdef _WIN32
+  if (SetEnvironmentVariableA(name, value)) {
+    return Status::OK();
+  } else {
+    return Status::Invalid("failed setting environment variable");
+  }
+#else
+  if (setenv(name, value, 1) == 0) {
+    return Status::OK();
+  } else {
+    return Status::Invalid("failed setting environment variable");
+  }
+#endif
+}
+
+Status SetEnvVar(const std::string& name, const std::string& value) {
+  return SetEnvVar(name.c_str(), value.c_str());
+}
+
+Status DelEnvVar(const char* name) {
+#ifdef _WIN32
+  if (SetEnvironmentVariableA(name, nullptr)) {
+    return Status::OK();
+  } else {
+    return Status::Invalid("failed deleting environment variable");
+  }
+#else
+  if (unsetenv(name) == 0) {
+    return Status::OK();
+  } else {
+    return Status::Invalid("failed deleting environment variable");
+  }
+#endif
+}
+
+Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
+
+TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
+
+TemporaryDir::~TemporaryDir() {
+  Status st = DeleteDirTree(path_);
+  if (!st.ok()) {
+    ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
+  }
+}
+
+Status TemporaryDir::Make(const std::string& prefix, std::unique_ptr<TemporaryDir>* out) {
+  bfs::path path;
+  std::string suffix = MakeRandomName(8);
+
+  BOOST_FILESYSTEM_TRY
+  path = bfs::temp_directory_path() / (prefix + suffix);
+  path += "/";
+  BOOST_FILESYSTEM_CATCH
+
+  PlatformFilename fn(path.native());
+  bool created = false;
+  RETURN_NOT_OK(CreateDir(fn, &created));
+  if (!created) {
+    // XXX Should we retry?
+    return Status::IOError("Path already exists: '", fn.ToString(), "'");
+  }
+  out->reset(new TemporaryDir(std::move(fn)));
+  return Status::OK();
+}
+
+SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) {}
+
+SignalHandler::SignalHandler(Callback cb) {
+#if ARROW_HAVE_SIGACTION
+  sa_.sa_handler = cb;
+  sa_.sa_flags = 0;
+  sigemptyset(&sa_.sa_mask);
+#else
+  cb_ = cb;
+#endif
+}
+
+#if ARROW_HAVE_SIGACTION
+SignalHandler::SignalHandler(const struct sigaction& sa) {
+  memcpy(&sa_, &sa, sizeof(sa));
+}
+#endif
+
+SignalHandler::Callback SignalHandler::callback() const {
+#if ARROW_HAVE_SIGACTION
+  return sa_.sa_handler;
+#else
+  return cb_;
+#endif
+}
+
+#if ARROW_HAVE_SIGACTION
+const struct sigaction& SignalHandler::action() const { return sa_; }
+#endif
+
+Status GetSignalHandler(int signum, SignalHandler* out) {
+#if ARROW_HAVE_SIGACTION
+  struct sigaction sa;
+  int ret = sigaction(signum, nullptr, &sa);
+  if (ret != 0) {
+    // TODO more detailed message using errno
+    return Status::IOError("sigaction call failed");
+  }
+  *out = SignalHandler(sa);
+#else
+  // To read the old handler, set the signal handler to something else temporarily
+  SignalHandler::Callback cb = signal(signum, SIG_IGN);
+  if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) {
+    // TODO more detailed message using errno
+    return Status::IOError("signal call failed");
+  }
+  *out = SignalHandler(cb);
+#endif
+  return Status::OK();
+}
+
+Status SetSignalHandler(int signum, const SignalHandler& handler,
+                        SignalHandler* old_handler) {
+#if ARROW_HAVE_SIGACTION
+  struct sigaction old_sa;
+  int ret = sigaction(signum, &handler.action(), &old_sa);
+  if (ret != 0) {
+    // TODO more detailed message using errno
+    return Status::IOError("sigaction call failed");
+  }
+  if (old_handler != nullptr) {
+    *old_handler = SignalHandler(old_sa);
+  }
+#else
+  SignalHandler::Callback cb = signal(signum, handler.callback());
+  if (cb == SIG_ERR) {
+    // TODO more detailed message using errno
+    return Status::IOError("signal call failed");
+  }
+  if (old_handler != nullptr) {
+    *old_handler = SignalHandler(cb);
+  }
+#endif
+  return Status::OK();
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h
index 779c6c62f0b..ab1b88b3f4c 100644
--- a/cpp/src/parquet/bloom_filter.h
+++ b/cpp/src/parquet/bloom_filter.h
@@ -25,6 +25,7 @@
 #include "parquet/hasher.h"
 #include "parquet/platform.h"
 #include "parquet/types.h"
+#include "parquet/parquet_types.h"
 
 namespace parquet {
 
@@ -57,6 +58,12 @@ class PARQUET_EXPORT BloomFilter {
   /// Get the number of bytes of bitset
   virtual uint32_t GetBitsetSize() const = 0;
 
+  virtual format::BloomFilterHash GetHashStrategy() const = 0;
+
+  virtual format::BloomFilterAlgorithm GetHashAlgorithm() const = 0;
+
+  virtual format::BloomFilterCompression GetBFCompression() const = 0;
+
   /// Compute hash for 32 bits value by using its plain encoding result.
   ///
   /// @param value the value to hash.
@@ -184,6 +191,12 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   void WriteTo(ArrowOutputStream* sink) const override;
   uint32_t GetBitsetSize() const override { return num_bytes_; }
 
+  format::BloomFilterHash GetHashStrategy() const override { return bfhash_; }
+
+  format::BloomFilterAlgorithm GetHashAlgorithm() const override { return bfalgorithm_; }
+
+  format::BloomFilterCompression GetBFCompression() const override { return bfcompression_; }
+
   uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); }
   uint64_t Hash(float value) const override { return hasher_->Hash(value); }
   uint64_t Hash(double value) const override { return hasher_->Hash(value); }
@@ -239,6 +252,12 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   // Algorithm used in this Bloom filter.
   Algorithm algorithm_;
 
+  format::BloomFilterCompression bfcompression_;
+
+  format::BloomFilterAlgorithm bfalgorithm_;
+
+  format::BloomFilterHash bfhash_;
+
   // The hash pointer points to actual hash class used.
   std::unique_ptr<Hasher> hasher_;
 };
diff --git a/cpp/src/parquet/bloom_filter_test.cc b/cpp/src/parquet/bloom_filter_test.cc
index 23aa4a58018..eb05d63485a 100644
--- a/cpp/src/parquet/bloom_filter_test.cc
+++ b/cpp/src/parquet/bloom_filter_test.cc
@@ -148,6 +148,65 @@ TEST(FPPTest, TestBloomFilter) {
   EXPECT_LT(exist, total_count * fpp);
 }
 
+
+TEST(HashTest, TestBloomFilter) {
+  // It counts the number of times FindHash returns true.
+  int exist = 0;
+
+  // Total count of elements that will be used
+#ifdef PARQUET_VALGRIND
+  const int total_count = 5000;
+#else
+  const int total_count = 10000000;
+#endif
+
+  // Bloom filter fpp parameter
+  const double fpp = 0.01;
+  std::vector<std::string> members;
+  BlockSplitBloomFilter bloom_filter;
+  bloom_filter.Init(BlockSplitBloomFilter::OptimalNumOfBits(total_count, fpp));
+  int FIXED_LENGTH_data = 124;
+
+  for (int i = 0; i < total_count; i++) {
+      parquet::ByteArray value;
+      char hello[FIXED_LENGTH_data];// = "parquet";
+      int64_t startnumber = i*FIXED_LENGTH_data;
+      for ( int ci = 0; ci < FIXED_LENGTH_data; ci++ ) {
+          hello[FIXED_LENGTH_data-ci-1] = (startnumber%10) + 48;
+          startnumber /= 10;
+      }
+      hello[FIXED_LENGTH_data] = '\0';
+      std::string test(hello);
+      // if (i % 2 == 0) {
+      int16_t definition_level = 1;
+      value.ptr = reinterpret_cast<const uint8_t*>(test.c_str());
+      value.len = test.size();
+      members.push_back(test);
+      bloom_filter.InsertHash(bloom_filter.Hash(&value));
+  }
+  uint32_t FIXED_LENGTH = 124;
+  char dest[FIXED_LENGTH];
+  for ( uint32_t i = 0; i < (FIXED_LENGTH-7);i++) dest[i] = '0';
+  dest[FIXED_LENGTH-7] = '4';
+  dest[FIXED_LENGTH-6] = '2';
+  dest[FIXED_LENGTH-5] = '3';
+  dest[FIXED_LENGTH-4] = '3';
+  dest[FIXED_LENGTH-3] = '5';
+  dest[FIXED_LENGTH-2] = '5';
+  dest[FIXED_LENGTH-1] = '2';
+  dest[FIXED_LENGTH] = '\0';
+  std::string test(dest);
+  ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+
+  if (bloom_filter.FindHash(bloom_filter.Hash(&pba))){
+      for (;exist < (int)members.size() && test.compare(members[exist])!=0;exist++) {
+      }
+  }  
+
+  // The exist should be probably less than 1000 according default FPP 0.01.
+  EXPECT_EQ(exist, 759949);
+}
+
 // The CompatibilityTest is used to test cross compatibility with parquet-mr, it reads
 // the Bloom filter binary generated by the Bloom filter class in the parquet-mr project
 // and tests whether the values inserted before could be filtered or not.
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index 60c44ffa6d2..55ec43097db 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -128,6 +128,12 @@ class PARQUET_EXPORT ColumnReader {
   virtual Type::type type() const = 0;
 
   virtual const ColumnDescriptor* descr() const = 0;
+
+  // Skip reading levels
+  // Returns the number of levels skipped
+  virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  virtual int64_t callReadBatch(int64_t batch_size,void* values,int64_t* values_read) = 0;
 };
 
 // API to read values from a single column. This is a main client facing API.
@@ -197,7 +203,18 @@ class TypedColumnReader : public ColumnReader {
 
   // Skip reading levels
   // Returns the number of levels skipped
-  virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+  // virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  int64_t callReadBatch(int64_t batch_size,void* value,int64_t* values_read){
+
+      
+      int16_t definition_level = 1;
+      int16_t repetition_level;
+      
+      int64_t r = ReadBatch(batch_size, &definition_level, &repetition_level,(T*)value,values_read);
+
+      return r;
+  }
 };
 
 namespace internal {
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 4be8bb67361..60ad5f5288a 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -25,6 +25,7 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <sstream>
 
 #include "arrow/array.h"
 #include "arrow/buffer_builder.h"
@@ -194,6 +195,7 @@ class SerializedPageWriter : public PageWriter {
     }
     compressor_ = GetCodec(codec, compression_level);
     thrift_serializer_.reset(new ThriftSerializer);
+    current_page_row_set_index = 0;
   }
 
   int64_t WriteDictionaryPage(const DictionaryPage& page) override {
@@ -371,6 +373,125 @@ class SerializedPageWriter : public PageWriter {
     page_header.__set_data_page_header_v2(data_page_header);
   }
 
+  int64_t WriteDataPagesWithIndex(const CompressedDataPage& page, format::PageLocation& ploc) override {
+    int64_t uncompressed_size = page.uncompressed_size();
+    std::shared_ptr<Buffer> compressed_data = page.buffer();
+
+    format::DataPageHeader data_page_header;
+    data_page_header.__set_num_values(page.num_values());
+    data_page_header.__set_encoding(ToThrift(page.encoding()));
+    data_page_header.__set_definition_level_encoding(
+        ToThrift(page.definition_level_encoding()));
+    data_page_header.__set_repetition_level_encoding(
+        ToThrift(page.repetition_level_encoding()));
+    data_page_header.__set_statistics(ToThrift(page.statistics()));
+
+    format::PageHeader page_header;
+    page_header.__set_type(format::PageType::DATA_PAGE);
+    page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size));
+    page_header.__set_compressed_page_size(static_cast<int32_t>(compressed_data->size()));
+    page_header.__set_data_page_header(data_page_header);
+    // TODO(PARQUET-594) crc checksum
+
+    int64_t start_pos = -1;
+    PARQUET_THROW_NOT_OK(sink_->Tell(&start_pos));
+    if (data_page_offset_ == 0) {
+      data_page_offset_ = start_pos;
+    }
+
+    int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_.get());
+    PARQUET_THROW_NOT_OK(sink_->Write(compressed_data->data(), compressed_data->size()));
+
+    total_uncompressed_size_ += uncompressed_size + header_size;
+    total_compressed_size_ += compressed_data->size() + header_size;
+    num_values_ += page.num_values();
+
+    int64_t current_pos = -1;
+    PARQUET_THROW_NOT_OK(sink_->Tell(&current_pos));
+    
+    ploc.offset = start_pos;
+    ploc.first_row_index = current_page_row_set_index;
+    ploc.compressed_page_size = page_header.compressed_page_size + (current_pos - start_pos);
+    current_page_row_set_index += page_header.data_page_header.num_values;
+
+    return current_pos - start_pos;
+}
+
+
+  /* sample Adding ColumnIndex from chunk to offset.
+  * Status HdfsParquetTableWriter::WritePageIndex() {
+  if (!state_->query_options().parquet_write_page_index) return Status::OK();
+
+  // Currently Impala only write Parquet files with a single row group. The current
+  // page index logic depends on this behavior as it only keeps one row group's
+  // statistics in memory.
+  DCHECK_EQ(file_metadata_.row_groups.size(), 1);
+
+  parquet::RowGroup* row_group = &(file_metadata_.row_groups[0]);
+  // Write out the column indexes.
+  for (int i = 0; i < columns_.size(); ++i) {
+    auto& column = *columns_[i]; // column-writer
+    if (!column.valid_column_index_) continue;
+    column.column_index_.__set_boundary_order(
+        column.row_group_stats_base_->GetBoundaryOrder());
+    // We always set null_counts.
+    column.column_index_.__isset.null_counts = true;
+    uint8_t* buffer = nullptr;
+    uint32_t len = 0;
+    RETURN_IF_ERROR(thrift_serializer_->SerializeToBuffer(
+        &column.column_index_, &len, &buffer));
+    RETURN_IF_ERROR(Write(buffer, len));
+    // Update the column_index_offset and column_index_length of the ColumnChunk
+    row_group->columns[i].__set_column_index_offset(file_pos_);
+    row_group->columns[i].__set_column_index_length(len);
+    file_pos_ += len;
+  }
+  // Write out the offset indexes.
+  for (int i = 0; i < columns_.size(); ++i) {
+    auto& column = *columns_[i];  // column-writer
+    uint8_t* buffer = nullptr;
+    uint32_t len = 0;
+    RETURN_IF_ERROR(thrift_serializer_->SerializeToBuffer(
+        &column.offset_index_, &len, &buffer));
+    RETURN_IF_ERROR(Write(buffer, len));
+    // Update the offset_index_offset and offset_index_length of the ColumnChunk
+    row_group->columns[i].__set_offset_index_offset(file_pos_);
+    row_group->columns[i].__set_offset_index_length(len);
+    file_pos_ += len;
+  }
+  return Status::OK();
+}
+  * 
+  */
+
+  void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) {
+     // index_page_offset = -1 since they are not supported
+
+    uint32_t ci_len, oi_len;
+    uint8_t* buffer;
+    if (file_pos_ == 0) sink_->Tell(&file_pos_);
+    thrift_serializer_->SerializeToBuffer(&ci,&ci_len,&buffer);
+    sink_->Write(buffer,ci_len);
+    thrift_serializer_->SerializeToBuffer(&oi,&oi_len,&buffer);
+    sink_->Write(buffer,oi_len);
+
+    if (oi_offset == 0 && ci_offset == 0) {
+       oi_offset = ci_len;
+    }
+
+    metadata_->WriteIndex(file_pos_, ci_offset, oi_offset, ci_len, oi_len);
+
+    ci_offset += ci_len;
+    oi_offset += oi_len;
+    // Write metadata at end of column chunk
+    metadata_->WriteTo(sink_.get());
+  }
+
+  void WritePageBloomFilter( BlockSplitBloomFilter& bl_page_filter, int64_t& file_pos) {
+      sink_->Tell(&file_pos);
+      bl_page_filter.WriteTo(sink_.get());
+  }
+
   bool has_compressor() override { return (compressor_ != nullptr); }
 
   int64_t num_values() { return num_values_; }
@@ -453,16 +574,45 @@ class SerializedPageWriter : public PageWriter {
   // Compression codec to use.
   std::unique_ptr<::arrow::util::Codec> compressor_;
 
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
   std::string data_page_aad_;
   std::string data_page_header_aad_;
 
   std::shared_ptr<Encryptor> meta_encryptor_;
   std::shared_ptr<Encryptor> data_encryptor_;
+=======
+  BlockSplitBloomFilter blf;
+>>>>>>> 522c3f988... insert hash
 
   std::shared_ptr<ResizableBuffer> encryption_buffer_;
 
   std::map<Encoding::type, int32_t> dict_encoding_stats_;
   std::map<Encoding::type, int32_t> data_encoding_stats_;
+=======
+
+<<<<<<< HEAD
+  // OffsetIndex stores the locations of the pages.
+  parquet::format::OffsetIndex offset_index_;
+
+  // ColumnIndex stores the statistics of the pages.
+  parquet::format::ColumnIndex column_index_;
+
+  // Memory consumption of the min/max values in the page index.
+  int64_t page_index_memory_consumption_ = 0;
+
+
+  /// In parquet::ColumnIndex we store the min and max values for each page.
+  /// However, we don't want to store very long strings, so we truncate them.
+  /// The value of it must not be too small, since we don't want to truncate
+  /// non-string values.
+  static const int PAGE_INDEX_MAX_STRING_LENGTH = 64;
+>>>>>>> 34b755b70... column index
+=======
+>>>>>>> c0fbc925b... write index
+=======
+>>>>>>> 6225e423c... page level bloom filter
 };
 
 // This implementation of the PageWriter writes to the final sink on Close .
@@ -514,6 +664,18 @@ class BufferedPageWriter : public PageWriter {
     return pager_->WriteDataPage(page);
   }
 
+  int64_t WriteDataPagesWithIndex(const parquet::CompressedDataPage &page, format::PageLocation& ploc) override {
+      return pager_->WriteDataPagesWithIndex(page, ploc);
+  }
+  
+  void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) {
+      pager_->WriteIndex(file_pos_, ci_offset, oi_offset, ci, oi);
+  }
+
+  void WritePageBloomFilter(BlockSplitBloomFilter& blf, int64_t& f_pos) {
+     //pager_->WritePageBloomFilter(blf, f_pos);
+  }
+
   void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override {
     pager_->Compress(src_buffer, dest_buffer);
   }
@@ -595,6 +757,19 @@ class ColumnWriterImpl {
 
   int64_t Close();
 
+  int64_t CloseWithIndex();
+
+  void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset);
+
+  void WriteBloomFilterOffset(int64_t& file_pos);
+
+  void WritePageBloomFilter(BlockSplitBloomFilter& bl_page_filter) {
+     int64_t f_pos;
+     //pager_->WritePageBloomFilter(bl_page_filter, f_pos);
+     //AddBloomFilterOffsetToOffsetIndex(f_pos);
+     
+  }
+
  protected:
   virtual std::shared_ptr<Buffer> GetValuesBuffer() = 0;
 
@@ -614,18 +789,32 @@ class ColumnWriterImpl {
   // Serializes the Data Pages in other encoding modes
   void AddDataPage();
 
+<<<<<<< HEAD
+<<<<<<< HEAD
   void BuildDataPageV1(int64_t definition_levels_rle_size,
                        int64_t repetition_levels_rle_size, int64_t uncompressed_size,
                        const std::shared_ptr<Buffer>& values);
   void BuildDataPageV2(int64_t definition_levels_rle_size,
                        int64_t repetition_levels_rle_size, int64_t uncompressed_size,
                        const std::shared_ptr<Buffer>& values);
+=======
+  // Adds Data Pages to an in memory buffer in dictionary encoding mode
+  // Serializes the Data Pages in other encoding modes
+=======
+  
+>>>>>>> 04c793ab2... write index
+  void AddDataPageWithIndex();
+>>>>>>> 009899ff1... write data page
 
   // Serializes Data Pages
   void WriteDataPage(const DataPage& page) {
     total_bytes_written_ += pager_->WriteDataPage(page);
   }
 
+  void WriteDataPageWithIndex(const CompressedDataPage& page, format::PageLocation& ploc) {
+    total_bytes_written_ += pager_->WriteDataPagesWithIndex(page, ploc);
+  }
+
   // Write multiple definition levels
   void WriteDefinitionLevels(int64_t num_levels, const int16_t* levels) {
     DCHECK(!closed_);
@@ -647,6 +836,8 @@ class ColumnWriterImpl {
   // Serialize the buffered Data Pages
   void FlushBufferedDataPages();
 
+  void FlushBufferedDataPagesWithIndex();
+
   ColumnChunkMetaDataBuilder* metadata_;
   const ColumnDescriptor* descr_;
   // scratch buffer if validity bits need to be recalculated.
@@ -655,6 +846,8 @@ class ColumnWriterImpl {
 
   std::unique_ptr<PageWriter> pager_;
 
+  std::unique_ptr<ThriftSerializer> thrift_serializer_;
+
   bool has_dictionary_;
   Encoding::type encoding_;
   const WriterProperties* properties_;
@@ -701,12 +894,100 @@ class ColumnWriterImpl {
 
   std::vector<std::unique_ptr<DataPage>> data_pages_;
 
+  /// In parquet::ColumnIndex we store the min and max values for each page.
+  /// However, we don't want to store very long strings, so we truncate them.
+  /// The value of it must not be too small, since we don't want to truncate
+  /// non-string values.
+  static const int PAGE_INDEX_MAX_STRING_LENGTH = 128;
+
+  ::arrow::Status AddMemoryConsumptionForPageIndex(int64_t new_memory_allocation) {
+      page_index_memory_consumption_ += new_memory_allocation;
+      return ::arrow::Status::OK();
+  }
+
+  ::arrow::Status ReserveOffsetIndex(int64_t capacity) {
+    PARQUET_THROW_NOT_OK(AddMemoryConsumptionForPageIndex(capacity * (sizeof(parquet::format::PageLocation)) + sizeof(int64_t)));
+    offset_index_.page_locations.reserve(capacity);
+    offset_index_.page_bloom_filter_offsets.reserve(capacity);
+    return ::arrow::Status::OK();
+  }
+
+  void AddLocationToOffsetIndex(const parquet::format::PageLocation location) {
+    offset_index_.page_locations.push_back(location);
+  }
+
+  void AddBloomFilterOffsetToOffsetIndex(const int64_t page_blf_offset) {
+    offset_index_.page_bloom_filter_offsets.push_back(page_blf_offset);
+  }
+   
+  ::arrow::Status TruncateDown ( std::string min, int32_t max_length, std::string* result ) {
+    *result = min.substr(0, std::min(static_cast<int32_t>(min.length()), max_length));
+    return Status::OK();
+  }
+
+  ::arrow::Status TruncateUp ( std::string max, int32_t max_length, std::string* result) {
+     if (max.length() <= (uint32_t) max_length) {
+       *result = max;
+     }
+
+      *result = max.substr(0, max_length);
+      int i = max_length - 1;
+      while (i > 0 && static_cast<int32_t>((*result)[i]) == -1) {
+        (*result)[i] += 1;
+        --i;
+      }
+      // We convert it to unsigned because signed overflow results in undefined behavior.
+      unsigned char uch = static_cast<unsigned char>((*result)[i]);
+      uch += 1;
+      (*result)[i] = uch;
+      if (i == 0 && (*result)[i] == 0) {
+        return Status(::arrow::StatusCode::CapacityError,"TruncateUp() couldn't increase string.");
+      }
+      result->resize(i + 1);
+      return Status::OK();
+  }
+  
+  ::arrow::Status AddPageStatsToColumnIndex(const parquet::EncodedStatistics page_stats) {
+    // If pages_stats contains min_value and max_value, then append them to min_values_
+    // and max_values_ and also mark the page as not null. In case min and max values are
+    // not set, push empty strings to maintain the consistency of the index and mark the
+    // page as null. Always push the null_count.
+    std::string min_val;
+    std::string max_val;
+    
+    if (page_stats.is_set()) {
+      
+      Status s_min = TruncateDown(page_stats.min(), PAGE_INDEX_MAX_STRING_LENGTH, &min_val);
+      
+      Status s_max = TruncateDown(page_stats.max(), PAGE_INDEX_MAX_STRING_LENGTH, &max_val);
+
+      if (!s_min.ok()) {
+        return s_min;
+      }
+      if (!s_max.ok()) {
+        return s_max;
+      }
+      
+      column_index_.null_pages.push_back(false);
+    } else {
+      DCHECK(!page_stats.is_set());
+      column_index_.null_pages.push_back(true);
+    }
+    PARQUET_THROW_NOT_OK(
+        AddMemoryConsumptionForPageIndex(min_val.capacity() + max_val.capacity()));
+    column_index_.min_values.emplace_back(std::move(min_val));
+    column_index_.max_values.emplace_back(std::move(max_val));
+    column_index_.null_counts.push_back(page_stats.null_count);
+    return Status::OK();
+  }
+
  private:
   void InitSinks() {
     definition_levels_sink_.Rewind(0);
     repetition_levels_sink_.Rewind(0);
   }
 
+<<<<<<< HEAD
   // Concatenate the encoded levels and values into one buffer
   void ConcatenateBuffers(int64_t definition_levels_rle_size,
                           int64_t repetition_levels_rle_size,
@@ -717,6 +998,19 @@ class ColumnWriterImpl {
     combined += definition_levels_rle_size;
     memcpy(combined, values->data(), values->size());
   }
+=======
+   // OffsetIndex stores the locations of the pages.
+  parquet::format::OffsetIndex offset_index_;
+
+  // ColumnIndex stores the statistics of the pages.
+  parquet::format::ColumnIndex column_index_;
+
+  // Memory consumption of the min/max values in the page index.
+  int64_t page_index_memory_consumption_ = 0;
+
+  parquet::format::PageLocation ploc;
+
+>>>>>>> c0fbc925b... write index
 };
 
 // return the size of the encoded buffer
@@ -877,6 +1171,78 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size,
   }
 }
 
+void ColumnWriterImpl::AddDataPageWithIndex() {
+  int64_t definition_levels_rle_size = 0;
+  int64_t repetition_levels_rle_size = 0;
+
+  std::shared_ptr<Buffer> values = GetValuesBuffer();
+
+  if (descr_->max_definition_level() > 0) {
+    definition_levels_rle_size =
+        RleEncodeLevels(definition_levels_sink_.data(), definition_levels_rle_.get(),
+                        descr_->max_definition_level());
+  }
+
+  if (descr_->max_repetition_level() > 0) {
+    repetition_levels_rle_size =
+        RleEncodeLevels(repetition_levels_sink_.data(), repetition_levels_rle_.get(),
+                        descr_->max_repetition_level());
+  }
+
+  int64_t uncompressed_size =
+      definition_levels_rle_size + repetition_levels_rle_size + values->size();
+
+  // Use Arrow::Buffer::shrink_to_fit = false
+  // underlying buffer only keeps growing. Resize to a smaller size does not reallocate.
+  PARQUET_THROW_NOT_OK(uncompressed_data_->Resize(uncompressed_size, false));
+
+  // Concatenate data into a single buffer
+  uint8_t* uncompressed_ptr = uncompressed_data_->mutable_data();
+  memcpy(uncompressed_ptr, repetition_levels_rle_->data(), repetition_levels_rle_size);
+  uncompressed_ptr += repetition_levels_rle_size;
+  memcpy(uncompressed_ptr, definition_levels_rle_->data(), definition_levels_rle_size);
+  uncompressed_ptr += definition_levels_rle_size;
+  memcpy(uncompressed_ptr, values->data(), values->size());
+
+  EncodedStatistics page_stats = GetPageStatistics();
+  page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path()));
+  page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order());
+  AddPageStatsToColumnIndex(page_stats);
+  ResetPageStatistics();
+
+  std::shared_ptr<Buffer> compressed_data;
+  if (pager_->has_compressor()) {
+    pager_->Compress(*(uncompressed_data_.get()), compressed_data_.get());
+    compressed_data = compressed_data_;
+  } else {
+    compressed_data = uncompressed_data_;
+  }
+
+  // Write the page to OutputStream eagerly if there is no dictionary or
+  // if dictionary encoding has fallen back to PLAIN
+  if (has_dictionary_ && !fallback_) {  // Save pages until end of dictionary encoding
+    std::shared_ptr<Buffer> compressed_data_copy;
+    PARQUET_THROW_NOT_OK(compressed_data->Copy(0, compressed_data->size(), allocator_,
+                                               &compressed_data_copy));
+    CompressedDataPage page(compressed_data_copy,
+                            static_cast<int32_t>(num_buffered_values_), encoding_,
+                            Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
+    total_compressed_bytes_ += page.size() + sizeof(format::PageHeader);
+    data_pages_.push_back(std::move(page));
+  } else {  // Eagerly write pages
+    CompressedDataPage page(compressed_data, static_cast<int32_t>(num_buffered_values_),
+                            encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size,
+                            page_stats);
+    WriteDataPageWithIndex(page,ploc);
+    AddLocationToOffsetIndex(ploc);
+  }
+
+  // Re-initialize the sinks for next Page.
+  InitSinks();
+  num_buffered_values_ = 0;
+  num_buffered_encoded_values_ = 0;
+}
+
 int64_t ColumnWriterImpl::Close() {
   if (!closed_) {
     closed_ = true;
@@ -901,6 +1267,38 @@ int64_t ColumnWriterImpl::Close() {
   return total_bytes_written_;
 }
 
+int64_t ColumnWriterImpl::CloseWithIndex() {
+  if (!closed_) {
+    closed_ = true;
+    if (has_dictionary_ && !fallback_) {
+      WriteDictionaryPage();
+    }
+    
+    FlushBufferedDataPagesWithIndex();
+
+    EncodedStatistics chunk_statistics = GetChunkStatistics();
+    chunk_statistics.ApplyStatSizeLimits(
+        properties_->max_statistics_size(descr_->path()));
+    chunk_statistics.set_is_signed(SortOrder::SIGNED == descr_->sort_order());
+
+    if (rows_written_ > 0 && chunk_statistics.is_set()) {
+      metadata_->SetStatistics(chunk_statistics);
+    }
+    
+    pager_->Close(has_dictionary_, fallback_);
+  }
+
+  return total_bytes_written_;
+}
+
+void ColumnWriterImpl::WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset) {
+    pager_->WriteIndex(file_pos_, ci_offset, oi_offset, column_index_, offset_index_);
+}
+
+void ColumnWriterImpl::WriteBloomFilterOffset(int64_t& file_pos_) {
+   metadata_->WriteBloomFilterOffset(file_pos_);
+}
+
 void ColumnWriterImpl::FlushBufferedDataPages() {
   // Write all outstanding data to a new page
   if (num_buffered_values_ > 0) {
@@ -913,6 +1311,24 @@ void ColumnWriterImpl::FlushBufferedDataPages() {
   total_compressed_bytes_ = 0;
 }
 
+void ColumnWriterImpl::FlushBufferedDataPagesWithIndex() {
+
+  if (num_buffered_values_ > 0) {
+    AddDataPageWithIndex();
+  }
+  
+  PARQUET_THROW_NOT_OK(ReserveOffsetIndex(data_pages_.size()));
+
+  for (size_t i = 0; i < data_pages_.size(); i++) {
+    // AddPageStatsToColumnIndex(data_pages_[i].statistics());
+    WriteDataPageWithIndex(data_pages_[i],ploc);
+    AddLocationToOffsetIndex(ploc);
+  }
+
+  data_pages_.clear();
+  total_compressed_bytes_ = 0;
+}
+
 // ----------------------------------------------------------------------
 // TypedColumnWriter
 
@@ -982,6 +1398,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
 
   int64_t Close() override { return ColumnWriterImpl::Close(); }
 
+<<<<<<< Updated upstream
+=======
+<<<<<<< HEAD
+>>>>>>> Stashed changes
   int64_t WriteBatch(int64_t num_values, const int16_t* def_levels,
                      const int16_t* rep_levels, const T* values) override {
     // We check for DataPage limits only after we have inserted the values. If a user
@@ -1009,8 +1429,28 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
     };
     DoInBatches(num_values, properties_->write_batch_size(), WriteChunk);
     return value_offset;
+<<<<<<< Updated upstream
+=======
+=======
+  int64_t CloseWithIndex() override { 
+    int64_t total_bytes_written = ColumnWriterImpl::CloseWithIndex(); 
+    //WritePageBloomFilter();
+    return total_bytes_written;
+  }
+
+  void WriteIndex(int64_t file_pos_, int64_t ci_offset, int64_t oi_offset) override { 
+    return ColumnWriterImpl::WriteIndex(file_pos_,  ci_offset, oi_offset); 
+>>>>>>> c0fbc925b... write index
+>>>>>>> Stashed changes
   }
 
+  void WriteBloomFilterOffset(int64_t& file_pos) override {
+     ColumnWriterImpl::WriteBloomFilterOffset(file_pos);
+  } 
+
+  void WriteBatch(int64_t num_values, const int16_t* def_levels,
+                  const int16_t* rep_levels, const T* values, bool with_index) override;
+
   void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels,
                         const int16_t* rep_levels, const uint8_t* valid_bits,
                         int64_t valid_bits_offset, const T* values) override {
@@ -1106,6 +1546,19 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
     total_bytes_written_ += pager_->WriteDictionaryPage(page);
   }
 
+<<<<<<< HEAD
+=======
+  // Checks if the Dictionary Page size limit is reached
+  // If the limit is reached, the Dictionary and Data Pages are serialized
+  // The encoding is switched to PLAIN
+  void CheckDictionarySizeLimit(bool with_index);
+
+<<<<<<< HEAD
+  void CheckDictionarySizeLimitWithIndex();
+
+>>>>>>> e07017907... writer;generic reader
+=======
+>>>>>>> c0fbc925b... write index
   EncodedStatistics GetPageStatistics() override {
     EncodedStatistics result;
     if (page_statistics_) result = page_statistics_->Encode();
@@ -1144,6 +1597,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
   std::shared_ptr<TypedStats> page_statistics_;
   std::shared_ptr<TypedStats> chunk_statistics_;
 
+<<<<<<< HEAD
+<<<<<<< HEAD
   // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the
   // dictionary passed to DictEncoder<T>::PutDictionary so we can check
   // subsequent array chunks to see either if materialization is required (in
@@ -1160,6 +1615,97 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
           ++values_to_write;
         }
       }
+=======
+=======
+  std::vector<BlockSplitBloomFilter> blf;
+  int num_rows_for_bf;
+  const double fpp = 0.001;
+
+>>>>>>> 6225e423c... page level bloom filter
+  inline int64_t WriteMiniBatch(int64_t num_values, const int16_t* def_levels,
+                                const int16_t* rep_levels, const T* values, bool with_index);
+
+  inline int64_t WriteMiniBatchSpaced(int64_t num_values, const int16_t* def_levels,
+                                      const int16_t* rep_levels,
+                                      const uint8_t* valid_bits,
+                                      int64_t valid_bits_offset, const T* values,
+                                      int64_t* num_spaced_written);
+
+  // Write values to a temporary buffer before they are encoded into pages
+  void WriteValues(int64_t num_values, const T* values) {
+    dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+        ->Put(values, static_cast<int>(num_values));
+  }
+
+  void WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits,
+                         int64_t valid_bits_offset, const T* values) {
+    dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+        ->PutSpaced(values, static_cast<int>(num_values), valid_bits, valid_bits_offset);
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const int32_t* values) {
+    for ( uint32_t i=0; i < num_values; i++){
+       blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(values[i]));
+    }
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const int64_t* values) {
+    for ( uint32_t i=0; i < num_values; i++){
+       blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(values[i]));
+    }
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const float* values) {
+    for ( uint32_t i=0; i < num_values; i++){
+       blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash((float)(int64_t)values[i]));
+    }
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const double* values) {
+    for ( uint32_t i=0; i < num_values; i++){
+       blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash((double)(int64_t)values[i]));
+    }
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const ByteArray* values) {
+    for ( uint32_t i=0; i < num_values; i++){
+       blf[blf.size()-1].InsertHash(blf[blf.size()-1].Hash(&values[i]));
+    }
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const Int96* values) {
+
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const FLBA* values) {
+
+  }
+
+  void AppendValuesToPageBloomFilter(int64_t num_values, const bool* values) {
+
+  }
+
+  void WritePageBloomFilter() {
+    for (uint32_t i=0; i < blf.size(); i++)
+      ColumnWriterImpl::WritePageBloomFilter(blf[i]);
+  }
+
+  void InitializeBloomF() {
+    if (blf.size() == 0) {
+      BlockSplitBloomFilter bf;
+      bf.Init(BlockSplitBloomFilter::OptimalNumOfBits(properties_->write_batch_size() << 8,fpp));
+      blf.push_back(std::move(bf));
+    }
+  }
+
+  void NewPageBloomFilter() {
+    BlockSplitBloomFilter bf;
+    bf.Init(BlockSplitBloomFilter::OptimalNumOfBits(properties_->write_batch_size() << 8,fpp));
+    blf.push_back(std::move(bf));
+  }
+
+};
+>>>>>>> e07017907... writer;generic reader
 
       WriteDefinitionLevels(num_values, def_levels);
     } else {
@@ -1167,6 +1713,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
       values_to_write = num_values;
     }
 
+<<<<<<< HEAD
+<<<<<<< HEAD
     // Not present for non-repeated fields
     if (descr_->max_repetition_level() > 0) {
       // A row could include more than one value
@@ -1176,7 +1724,47 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
           rows_written_++;
         }
       }
+=======
+template <typename DType>
+<<<<<<< HEAD
+void TypedColumnWriterImpl<DType>::CheckDictionarySizeLimitWithIndex() {
+=======
+void TypedColumnWriterImpl<DType>::CheckDictionarySizeLimit(bool with_index) {
+>>>>>>> d78a37051... write index
+  // We have to dynamic cast here because TypedEncoder<Type> as some compilers
+  // don't want to cast through virtual inheritance
+  auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get());
+  //if (dict_encoder->dict_encoded_size() >= properties_->dictionary_pagesize_limit()) {
+    WriteDictionaryPage();
+    // Serialize the buffered Dictionary Indicies
+<<<<<<< HEAD
+    FlushBufferedDataPagesWithIndex();
+=======
+    if (!with_index)
+       FlushBufferedDataPages();
+    else{
+       NewPageBloomFilter();
+       FlushBufferedDataPagesWithIndex();
+<<<<<<< HEAD
+>>>>>>> d78a37051... write index
+=======
+    }
+>>>>>>> b4a66445c... page write bloom
+    fallback_ = true;
+    // Only PLAIN encoding is supported for fallback in V1
+    current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_,
+                                   properties_->memory_pool());
+    encoding_ = Encoding::PLAIN;
+  //}
+}
+
+=======
+>>>>>>> c0fbc925b... write index
+// ----------------------------------------------------------------------
+// Instantiate templated classes
+>>>>>>> e07017907... writer;generic reader
 
+<<<<<<< HEAD
       WriteRepetitionLevels(num_values, rep_levels);
     } else {
       // Each value is exactly one row
@@ -1201,6 +1789,23 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
               def_levels[x] >= level_info_.repeated_ancestor_def_level ? 1 : 0;
         }
         *null_count = *out_values_to_write - *out_spaced_values_to_write;
+<<<<<<< Updated upstream
+=======
+=======
+template <typename DType>
+int64_t TypedColumnWriterImpl<DType>::WriteMiniBatch(int64_t num_values,
+                                                     const int16_t* def_levels,
+                                                     const int16_t* rep_levels,
+                                                     const T* values,
+                                                     bool with_index) {
+  int64_t values_to_write = 0;
+  // If the field is required and non-repeated, there are no definition levels
+  if (descr_->max_definition_level() > 0) {
+    for (int64_t i = 0; i < num_values; ++i) {
+      if (def_levels[i] == descr_->max_definition_level()) {
+        ++values_to_write;
+>>>>>>> fa45c95a5... write index
+>>>>>>> Stashed changes
       }
       return;
     }
@@ -1254,17 +1859,58 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
       // Each value is exactly one row
       rows_written_ += static_cast<int>(num_levels);
     }
+<<<<<<< Updated upstream
+=======
+<<<<<<< HEAD
+=======
+
+    WriteRepetitionLevels(num_values, rep_levels);
+  } else {
+    // Each value is exactly one row
+    rows_written_ += static_cast<int>(num_values);
+  }
+
+  // PARQUET-780
+  if (values_to_write > 0) {
+    DCHECK(nullptr != values) << "Values ptr cannot be NULL";
+  }
+
+  WriteValues(values_to_write, values);
+  //AppendValuesToPageBloomFilter(values_to_write,values);
+
+  if (page_statistics_ != nullptr) {
+    page_statistics_->Update(values, values_to_write, num_values - values_to_write);
+>>>>>>> 6225e423c... page level bloom filter
+>>>>>>> Stashed changes
   }
 
   void CommitWriteAndCheckPageLimit(int64_t num_levels, int64_t num_values) {
     num_buffered_values_ += num_levels;
     num_buffered_encoded_values_ += num_values;
 
+<<<<<<< HEAD
     if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
       AddDataPage();
     }
+=======
+  if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
+    //NewPageBloomFilter();
+    
+    if (!with_index)
+       AddDataPage();
+    else
+       AddDataPageWithIndex();
+  }
+  if (has_dictionary_ && !fallback_) {
+<<<<<<< HEAD
+    CheckDictionarySizeLimit();
+>>>>>>> fa45c95a5... write index
+=======
+    CheckDictionarySizeLimit(with_index);
+>>>>>>> d78a37051... write index
   }
 
+<<<<<<< HEAD
   void FallbackToPlainEncoding() {
     if (IsDictionaryEncoding(current_encoder_->encoding())) {
       WriteDictionaryPage();
@@ -1275,6 +1921,32 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
       current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_,
                                      properties_->memory_pool());
       encoding_ = Encoding::PLAIN;
+=======
+  return values_to_write;
+}
+
+template <typename DType>
+int64_t TypedColumnWriterImpl<DType>::WriteMiniBatchSpaced(
+    int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels,
+    const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values,
+    int64_t* num_spaced_written) {
+  int64_t values_to_write = 0;
+  int64_t spaced_values_to_write = 0;
+  // If the field is required and non-repeated, there are no definition levels
+  if (descr_->max_definition_level() > 0) {
+    // Minimal definition level for which spaced values are written
+    int16_t min_spaced_def_level = descr_->max_definition_level();
+    if (descr_->schema_node()->is_optional()) {
+      min_spaced_def_level--;
+    }
+    for (int64_t i = 0; i < num_levels; ++i) {
+      if (def_levels[i] == descr_->max_definition_level()) {
+        ++values_to_write;
+      }
+      if (def_levels[i] >= min_spaced_def_level) {
+        ++spaced_values_to_write;
+      }
+>>>>>>> e07017907... writer;generic reader
     }
   }
 
@@ -1352,6 +2024,7 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
                            maybe_parent_nulls);
   };
 
+<<<<<<< HEAD
   if (!IsDictionaryEncoding(current_encoder_->encoding()) ||
       !DictionaryDirectWriteSupported(array)) {
     // No longer dictionary-encoding for whatever reason, maybe we never were
@@ -1361,6 +2034,13 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
     // dictionary page limit is reached, at which everything (dictionary and
     // dense) will fall back to plain encoding
     return WriteDense();
+=======
+  if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
+    AddDataPage();
+  }
+  if (has_dictionary_ && !fallback_) {
+    CheckDictionarySizeLimit(false);
+>>>>>>> d78a37051... write index
   }
 
   auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get());
@@ -1368,7 +2048,28 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
   std::shared_ptr<::arrow::Array> dictionary = data.dictionary();
   std::shared_ptr<::arrow::Array> indices = data.indices();
 
+<<<<<<< HEAD
+=======
+template <typename DType>
+void TypedColumnWriterImpl<DType>::WriteBatch(int64_t num_values,
+                                              const int16_t* def_levels,
+                                              const int16_t* rep_levels,
+                                              const T* values,
+                                              bool with_index) {
+  // We check for DataPage limits only after we have inserted the values. If a user
+  // writes a large number of values, the DataPage size can be much above the limit.
+  // The purpose of this chunking is to bound this. Even if a user writes large number
+  // of values, the chunking will ensure the AddDataPage() is called at a reasonable
+  // pagesize limit
+  int64_t write_batch_size = properties_->write_batch_size();
+
+  //InitializeBloomF();
+
+  int num_batches = static_cast<int>(num_values / write_batch_size);
+  int64_t num_remaining = num_values % write_batch_size;
+>>>>>>> e07017907... writer;generic reader
   int64_t value_offset = 0;
+<<<<<<< HEAD
   auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) {
     int64_t batch_num_values = 0;
     int64_t batch_num_spaced_values = 0;
@@ -1387,7 +2088,21 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
     CommitWriteAndCheckPageLimit(batch_size, batch_num_values);
     value_offset += batch_num_spaced_values;
   };
+=======
+  for (int round = 0; round < num_batches; round++) {
+    int64_t offset = round * write_batch_size;
+    int64_t num_values = WriteMiniBatch(write_batch_size, &def_levels[offset],
+                                        &rep_levels[offset], &values[value_offset], with_index);
+    value_offset += num_values;
+  }
+  // Write the remaining values
+  int64_t offset = num_batches * write_batch_size;
+  WriteMiniBatch(num_remaining, &def_levels[offset], &rep_levels[offset],
+                 &values[value_offset], with_index);
+}
+>>>>>>> fa45c95a5... write index
 
+<<<<<<< HEAD
   // Handle seeing dictionary for the first time
   if (!preserved_dictionary_) {
     // It's a new dictionary. Call PutDictionary and keep track of it
@@ -1403,6 +2118,28 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
     // Dictionary has changed
     PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding());
     return WriteDense();
+=======
+template <typename DType>
+void TypedColumnWriterImpl<DType>::WriteBatchSpaced(
+    int64_t num_values, const int16_t* def_levels, const int16_t* rep_levels,
+    const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) {
+  // We check for DataPage limits only after we have inserted the values. If a user
+  // writes a large number of values, the DataPage size can be much above the limit.
+  // The purpose of this chunking is to bound this. Even if a user writes large number
+  // of values, the chunking will ensure the AddDataPage() is called at a reasonable
+  // pagesize limit
+  int64_t write_batch_size = properties_->write_batch_size();
+  int num_batches = static_cast<int>(num_values / write_batch_size);
+  int64_t num_remaining = num_values % write_batch_size;
+  int64_t num_spaced_written = 0;
+  int64_t values_offset = 0;
+  for (int round = 0; round < num_batches; round++) {
+    int64_t offset = round * write_batch_size;
+    WriteMiniBatchSpaced(write_batch_size, &def_levels[offset], &rep_levels[offset],
+                         valid_bits, valid_bits_offset + values_offset,
+                         values + values_offset, &num_spaced_written);
+    values_offset += num_spaced_written;
+>>>>>>> 522c3f988... insert hash
   }
 
   PARQUET_CATCH_NOT_OK(
@@ -1519,8 +2256,16 @@ Status TypedColumnWriterImpl<BooleanType>::WriteArrowDense(
   if (array.type_id() != ::arrow::Type::BOOL) {
     ARROW_UNSUPPORTED();
   }
+<<<<<<< HEAD
   return WriteArrowSerialize<BooleanType, ::arrow::BooleanType>(
       array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls);
+<<<<<<< Updated upstream
+=======
+=======
+  PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, buffer,false));
+  return Status::OK();
+>>>>>>> fa45c95a5... write index
+>>>>>>> Stashed changes
 }
 
 // ----------------------------------------------------------------------
@@ -1559,7 +2304,7 @@ Status TypedColumnWriterImpl<Int32Type>::WriteArrowDense(
     const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
   switch (array.type()->id()) {
     case ::arrow::Type::NA: {
-      PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr));
+      PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr,false));
     } break;
       WRITE_SERIALIZE_CASE(INT8, Int8Type, Int32Type)
       WRITE_SERIALIZE_CASE(UINT8, UInt8Type, Int32Type)
diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h
index 57f98533a72..411215aeda5 100644
--- a/cpp/src/parquet/column_writer.h
+++ b/cpp/src/parquet/column_writer.h
@@ -24,6 +24,8 @@
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/types.h"
+#include "parquet/parquet_types.h"
+#include "parquet/bloom_filter.h"
 
 namespace arrow {
 
@@ -99,11 +101,19 @@ class PARQUET_EXPORT PageWriter {
 
   virtual int64_t WriteDataPage(const DataPage& page) = 0;
 
+  virtual int64_t WriteDataPagesWithIndex(const CompressedDataPage& data_page, format::PageLocation& ploc) = 0;
+
+  virtual void WritePageBloomFilter(BlockSplitBloomFilter& bl_page_filter, int64_t& file_pos) = 0;
+
+  virtual void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, format::ColumnIndex& ci, format::OffsetIndex& oi) = 0;
+
   virtual int64_t WriteDictionaryPage(const DictionaryPage& page) = 0;
 
   virtual bool has_compressor() = 0;
 
   virtual void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) = 0;
+
+  int64_t current_page_row_set_index;
 };
 
 static constexpr int WRITE_BATCH_SIZE = 1000;
@@ -119,6 +129,12 @@ class PARQUET_EXPORT ColumnWriter {
   /// \return Total size of the column in bytes
   virtual int64_t Close() = 0;
 
+  virtual int64_t CloseWithIndex() = 0;
+
+  virtual void WriteIndex(int64_t file_pos_,  int64_t ci_offset, int64_t oi_offset) = 0;
+
+  virtual void WriteBloomFilterOffset(int64_t& file_pos) = 0;
+
   /// \brief The physical Parquet type of the column
   virtual Type::type type() const = 0;
 
@@ -170,7 +186,13 @@ class TypedColumnWriter : public ColumnWriter {
   // It can be smaller than `num_values` is there are some undefined values.
   virtual int64_t WriteBatch(int64_t num_values, const int16_t* def_levels,
                              const int16_t* rep_levels, const T* values) = 0;
-
+  virtual void WriteBatch(int64_t num_values, const int16_t* def_levels,
+                          const int16_t* rep_levels, const T* values) = 0;
+  
+  // Write a batch of repetition levels, definition levels, and values to the
+  // column.
+  virtual void WriteBatchWithIndex(int64_t num_values, const int16_t* def_levels,
+                          const int16_t* rep_levels, const T* values, bool with_index = false) = 0;
   /// Write a batch of repetition levels, definition levels, and values to the
   /// column.
   ///
@@ -203,6 +225,7 @@ class TypedColumnWriter : public ColumnWriter {
 
   // Estimated size of the values that are not written to a page yet
   virtual int64_t EstimatedBufferedValueBytes() const = 0;
+  
 };
 
 using BoolWriter = TypedColumnWriter<BooleanType>;
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 67f211b29c7..8cf0ae38504 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -42,6 +42,11 @@
 #include "parquet/properties.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
+<<<<<<< HEAD
+=======
+#include <chrono>
+#include <time.h>
+>>>>>>> cc47998e9... high precision time
 
 namespace parquet {
 
@@ -49,6 +54,9 @@ namespace parquet {
 static constexpr int64_t kDefaultFooterReadSize = 64 * 1024;
 static constexpr uint32_t kFooterSize = 8;
 
+static constexpr uint32_t kColumnIndexReadSize = 16*1024;
+static constexpr uint32_t kOffsetIndexReadSize = 16*1024;
+
 // For PARQUET-816
 static constexpr int64_t kMaxDictHeaderSize = 100;
 
@@ -83,9 +91,47 @@ std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
   return contents_->GetColumnPageReader(i);
 }
 
+
+std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReaderWithIndex(int i,void* predicate, int64_t& min_index,
+                                            int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned,
+                                            int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                                            std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index,
+                                            parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf,
+                                            bool& first_time_blf,bool& first_time_index,
+                                            float& blf_load_time, float& index_load_time) {
+  DCHECK(i < metadata()->num_columns())
+      << "The RowGroup only has " << metadata()->num_columns()
+      << "columns, requested column: " << i;
+  return contents_->GetColumnPageReaderWithIndex(i,predicate, min_index, predicate_col, row_index,type_num, with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter, with_page_bf,
+                                            unsorted_min_index, unsorted_row_index, col_index, offset_index, blf, first_time_blf,first_time_index,
+                                            blf_load_time, index_load_time);
+}
+
+std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithIndex(int i,void* predicate, int64_t& min_index, int predicate_col, 
+                                  int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned,
+                                            int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                                            std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index) {
+  DCHECK(i < metadata()->num_columns())
+      << "The RowGroup only has " << metadata()->num_columns()
+      << "columns, requested column: " << i;
+  const ColumnDescriptor* descr = metadata()->schema()->Column(i);
+
+  std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReaderWithIndex(i,predicate, min_index, predicate_col, row_index,type_num, with_index, binary_search, count_pages_scanned,
+                                            total_num_pages, last_first_row, with_bloom_filter, with_page_bf,
+                                            unsorted_min_index, unsorted_row_index, col_index,offset_index,blf,first_time_blf,first_time_index,
+                                            blf_load_time, index_load_time);
+  return ColumnReader::Make(
+      descr, std::move(page_reader),
+      const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
+}
+
 // Returns the rowgroup metadata
 const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); }
 
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
 /// Compute the section of the file that should be read for the given
 /// row group and column chunk.
 arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
@@ -115,7 +161,15 @@ arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
 
   return {col_start, col_length};
 }
-
+=======
+uint64_t page_offset=0,num_values=0,next_page_offset=0;
+>>>>>>> 10d3ed008... setup for file offset
+=======
+uint64_t page_offset,num_values,next_page_offset;
+>>>>>>> c0a9bb12f... default setup
+
+=======
+>>>>>>> c0ee60adc... generic reader
 // RowGroupReader::Contents implementation for the Parquet file specification
 class SerializedRowGroup : public RowGroupReader::Contents {
  public:
@@ -138,6 +192,12 @@ class SerializedRowGroup : public RowGroupReader::Contents {
 
   const ReaderProperties* properties() const override { return &properties_; }
 
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
   std::unique_ptr<PageReader> GetColumnPageReader(int i) override {
     // Read column chunk from the file
     auto col = row_group_metadata_->ColumnChunk(i);
@@ -153,7 +213,1053 @@ class SerializedRowGroup : public RowGroupReader::Contents {
     } else {
       stream = properties_.GetStream(source_, col_range.offset, col_range.length);
     }
+  void SkipPages(long int v) const {
+
+  void GoToPage(int64_t v, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index, uint64_t& page_offset,uint64_t& num_values,uint64_t& next_page_offset) const {
+=======
+  void GoToPage(int64_t v, int64_t default_start, int64_t default_next_page_offset, int64_t default_num_values,parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index, uint64_t& page_offset,uint64_t& num_values,uint64_t& next_page_offset) const {
+>>>>>>> c0a9bb12f... default setup
+      std::vector<int>::size_type itemindex = 0;
+=======
+
+  void GetRowRangeForPage(uint64_t& row_group_index, parquet::format::OffsetIndex offset_index, uint64_t page_idx, uint64_t& row_range_start, uint64_t& row_range_end) {
+    const auto& page_locations = offset_index.page_locations;
+    DCHECK(page_idx <  page_locations.size()) << "The page start index " << page_idx << " is greater than last page" << page_locations.size();
+    row_range_start = page_locations[page_idx].first_row_index;
+    if (page_idx == page_locations.size() - 1) {
+      row_range_end = row_range_end - row_range_start - 1;
+    } else {
+      row_range_end = page_locations[page_idx + 1].first_row_index - 1;
+    }
+  }
+
+=======
+>>>>>>> c0ee60adc... generic reader
+  void GetPageIndex(int64_t v, int64_t& min_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index) const {
+<<<<<<< HEAD
+//      std::vector<int>::size_type itemindex = 0;
+>>>>>>> b2788ebb5... added one page check
+      //std::vector<int64_t> min_vec = std::vector<std::basic_string<char>>(col_index.min_values.begin(), col_index.min_values.end());
+      int64_t min_diff = std::numeric_limits<int64_t>::max();//std::lower_bound(min_vec.begin(),min_vec.end(),v);
+=======
+>>>>>>> e33dd0dac... changed binary search
+      
+      for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+           int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str();
+           int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str();
+           int64_t max_diff = *page_max - *page_min;
+=======
+  void GetPageIndex(void* predicate, int64_t& min_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index,Type::type type_num) const {
+=======
+  void GetPageIndex(void* predicate, int64_t& min_index,int64_t& row_index, parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index,Type::type type_num) const {
+<<<<<<< HEAD
+      int64_t min_diff = std::numeric_limits<int64_t>::max();
+>>>>>>> 03bef468a... fixed bsearch;order by parquet file;skip pages
+=======
+>>>>>>> 20660f098... experiments
+=======
+  bool isSorted(parquet::format::ColumnIndex col_index,parquet::format::OffsetIndex offset_index,Type::type type_num) const {
+      bool sorted = false;
+      switch(type_num) {
+         case Type::BOOLEAN:{
+
+           break;
+         }
+         case Type::INT32:{
+           int32_t* page_min_prev = (int32_t*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               int32_t* page_min_curr = (int32_t*)(void*)col_index.min_values[itemindex].c_str();
+               if ( *page_min_prev <= *page_min_curr ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::INT64:{
+           int64_t* page_min_prev = (int64_t*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               int64_t* page_min_curr = (int64_t*)(void*)col_index.min_values[itemindex].c_str();
+               if ( *page_min_prev <= *page_min_curr ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::INT96:{
+           uint32_t* page_min_prev = (uint32_t*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               uint32_t* page_min_curr = (uint32_t*)(void*)col_index.min_values[itemindex].c_str();
+               if ( *page_min_prev <= *page_min_curr ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::FLOAT:{
+           float* page_min_prev = (float*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               float* page_min_curr = (float*)(void*)col_index.min_values[itemindex].c_str();
+               if ( *page_min_prev <= *page_min_curr ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::DOUBLE:{
+           double* page_min_prev = (double*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               double* page_min_curr = (double*)(void*)col_index.min_values[itemindex].c_str();
+               if ( *page_min_prev <= *page_min_curr ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::BYTE_ARRAY:{
+           char* page_min_prev = (char*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               char* page_min_curr = (char*)(void*)col_index.min_values[itemindex].c_str();
+               if ( strcmp(page_min_prev,page_min_curr) <= 0 ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         case Type::FIXED_LEN_BYTE_ARRAY:{
+           char* page_min_prev = (char*)(void*)col_index.min_values[0].c_str();
+           for (uint64_t itemindex = 1;itemindex < offset_index.page_locations.size();) {
+               char* page_min_curr = (char*)(void*)col_index.min_values[itemindex].c_str();
+               if ( strcmp(page_min_prev,page_min_curr) <= 0 ){
+                  itemindex++;
+                  page_min_prev = page_min_curr;
+               }else{
+                  return sorted;
+               }
+           }
+           sorted = true;
+           break;
+         }
+         default:{
+           break;
+         }
+      }
+      return sorted;
+  }
+
+  void page_bloom_filter_has_value(std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_, void* predicate, format::OffsetIndex& offset_index
+                                    , int64_t& min_index, Type::type type_num, int64_t& row_index) const {
+      int64_t blf_offset = offset_index.page_bloom_filter_offsets[min_index];
+      std::shared_ptr<ArrowInputStream> stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes);
+      BlockSplitBloomFilter page_blf = BlockSplitBloomFilter::Deserialize(stream_.get());
+      row_index = offset_index.page_locations[min_index].first_row_index;
+      switch(type_num) {
+          case Type::BOOLEAN:{
+          break;
+          }
+          case Type::INT32:{
+            int32_t v = *((int32_t*) predicate);
+            if (!page_blf.FindHash(page_blf.Hash(v))) row_index = -1;
+            break;
+          }
+          case Type::INT64:{
+            int64_t v = *((int64_t*) predicate);
+            if (!page_blf.FindHash(page_blf.Hash(v))) row_index = -1;
+            break;
+          }
+          case Type::INT96:{
+             uint32_t v = *((uint32_t*) predicate);
+             break;
+          }
+          case Type::FLOAT:{
+             float v = *((float*) predicate);
+             if (!page_blf.FindHash(page_blf.Hash((float)(int64_t)v))) row_index = -1;
+             break;
+          }
+          case Type::DOUBLE:{
+             double v = *((double*) predicate);
+             if (!page_blf.FindHash(page_blf.Hash((double)(int64_t)v))) row_index = -1;
+             break;
+          }
+          case Type::BYTE_ARRAY:{
+             const char* p = (char*) predicate;
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+             if (!page_blf.FindHash(page_blf.Hash(&pba))) row_index = -1;
+             break;
+          }
+          case Type::FIXED_LEN_BYTE_ARRAY:{
+             char* v = (char*) predicate;
+             uint8_t ptr = *v;
+             ByteArray pba((uint32_t)strlen(v),&ptr);
+             if (!page_blf.FindHash(page_blf.Hash(&pba))) row_index = -1;
+             break;
+          }
+          default:{
+             parquet::ParquetException::NYI("type reader not implemented");
+          }
+      }
+  }
+
+
+  void page_bloom_filter_has_value(std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_, void* predicate, format::OffsetIndex& offset_index
+                                    , std::vector<int64_t>& unsorted_min_index, Type::type type_num, std::vector<int64_t>& unsorted_row_index) const {
+      
+      for ( int64_t min_index: unsorted_min_index) {
+        int64_t blf_offset = offset_index.page_bloom_filter_offsets[min_index];
+      std::shared_ptr<ArrowInputStream> stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes);
+      BlockSplitBloomFilter page_blf = BlockSplitBloomFilter::Deserialize(stream_.get());
+      unsorted_row_index.push_back(offset_index.page_locations[min_index].first_row_index);
+      switch(type_num) {
+          case Type::BOOLEAN:{
+          break;
+          }
+          case Type::INT32:{
+            int32_t v = *((int32_t*) predicate);
+            if (!page_blf.FindHash(page_blf.Hash(v))) unsorted_row_index.pop_back();
+            break;
+          }
+          case Type::INT64:{
+            int64_t v = *((int64_t*) predicate);
+            if (!page_blf.FindHash(page_blf.Hash(v))) unsorted_row_index.pop_back();
+            break;
+          }
+          case Type::INT96:{
+             uint32_t v = *((uint32_t*) predicate);
+             break;
+          }
+          case Type::FLOAT:{
+             float v = *((float*) predicate);
+             if (!page_blf.FindHash(page_blf.Hash((float)(int64_t)v))) unsorted_row_index.pop_back();
+             break;
+          }
+          case Type::DOUBLE:{
+             double v = *((double*) predicate);
+             if (!page_blf.FindHash(page_blf.Hash((double)(int64_t)v))) unsorted_row_index.pop_back();
+             break;
+          }
+          case Type::BYTE_ARRAY:{
+             const char* p = (char*) predicate;
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+             if (!page_blf.FindHash(page_blf.Hash(&pba))) unsorted_row_index.pop_back();
+             break;
+          }
+          case Type::FIXED_LEN_BYTE_ARRAY:{
+             const char* p = (char*) predicate;
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+             if (!page_blf.FindHash(page_blf.Hash(&pba))) unsorted_row_index.pop_back();
+             break;
+          }
+          default:{
+             parquet::ParquetException::NYI("type reader not implemented");
+          }
+      }
+      } 
+  }
+
+  void GetPageIndex(std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_, void* predicate, 
+                    int64_t& min_index,int64_t& row_index, parquet::format::ColumnIndex col_index, 
+                    parquet::format::OffsetIndex offset_index,Type::type type_num, bool sorted, 
+                    bool with_binarysearch, int64_t& count_pages_scanned,
+<<<<<<< HEAD
+                    parquet::BlockSplitBloomFilter& blf, bool with_bloom_filter, bool with_page_bf) const {
+<<<<<<< HEAD
+      bool sorted = isSorted(col_index,offset_index,type_num);
+>>>>>>> 77931bb15... use binary search
+=======
+=======
+                    bool with_bloom_filter, bool with_page_bf) const {
+>>>>>>> dcf50b2dd... PARQUET-1327-bloom-filter-read-write-implementation-separate-calls
+      
+>>>>>>> 08c315bf2... unsorted rows
+      switch(type_num) {
+         case Type::BOOLEAN:{
+           // doesn't make sense for bool
+           break;
+         }
+         case Type::INT32:{
+              int32_t v = *((int32_t*) predicate);
+              
+              
+
+              if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      int32_t* page_min_curr = (int32_t*)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( v < *page_min_curr ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        int32_t* page_min_next = (int32_t*)col_index.min_values[itemindex+1].c_str();
+                        if ( v > *page_min_next ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( v < *page_min_next && v > *page_min_curr ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else {
+                         begin_index = last_index + 1;
+                         count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else{
+                for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                 int32_t* page_min = (int32_t*)(void *)col_index.min_values[itemindex].c_str();
+                 int32_t* page_max = (int32_t*)(void *)col_index.max_values[itemindex].c_str();
+                 int32_t max_diff = *page_max - *page_min;
+
+                  if ( *page_min <= v && v <= *page_max ) {
+                    min_index = itemindex;
+                  }
+                  count_pages_scanned = itemindex;
+                }
+                min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index;
+              }
+           break;
+         }
+         case Type::INT64:
+         {
+             int64_t v = *((int64_t*) predicate);
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
+<<<<<<< HEAD
+             
+             for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+             int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str();
+             int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str();
+<<<<<<< HEAD
+<<<<<<< HEAD
+             int64_t max_diff = *page_max - *page_min;
+>>>>>>> 6eef203fa... generic predicate
+=======
+             int64_t diff = *page_max - v;
+>>>>>>> 03bef468a... fixed bsearch;order by parquet file;skip pages
+=======
+             int64_t max_diff = *page_max - *page_min;
+>>>>>>> 272f1189f... tests
+=======
+             if(sorted){
+=======
+=======
+             if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) {
+                 row_index = -1; return;
+             }
+=======
+             
+>>>>>>> dcf50b2dd... PARQUET-1327-bloom-filter-read-write-implementation-separate-calls
+             
+>>>>>>> 8c2d1c73b... bloom filter tests
+             if(sorted && with_binarysearch){
+>>>>>>> f82a768c4... binary search and page count
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      int64_t* page_min_curr = (int64_t*)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( v < *page_min_curr ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if(itemindex < last_index){
+                        int64_t* page_min_next = (int64_t*)col_index.min_values[itemindex+1].c_str();
+                        if ( v > *page_min_next ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( v < *page_min_next && v > *page_min_curr ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                        begin_index = last_index + 1;
+                        count_pages_scanned++;
+                      }
+                   }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else{
+                 for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                  int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str();
+                  int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str();
+                  int64_t max_diff = *page_max - *page_min;
+>>>>>>> 77931bb15... use binary search
+           
+                  if ( *page_min <= v && v <= *page_max ) {
+                    min_index = itemindex;
+                  }
+                  count_pages_scanned = itemindex;
+                }
+                min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index;
+              }
+            break;
+         }
+         case Type::INT96:
+         {
+             uint32_t v = *((uint32_t*) predicate);
+              if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      uint32_t* page_min_curr = (uint32_t*)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( v < *page_min_curr ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        uint32_t* page_min_next = (uint32_t*)col_index.min_values[itemindex+1].c_str();
+                        if ( v > *page_min_next ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( v < *page_min_next && v > *page_min_curr ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                        begin_index = last_index + 1;
+                        count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else {
+                for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                  uint32_t* page_min = (uint32_t*)(void *)col_index.min_values[itemindex].c_str();
+                  uint32_t* page_max = (uint32_t*)(void *)col_index.max_values[itemindex].c_str();
+                  uint32_t max_diff = (*page_max - *page_min);
+            
+                  if ( *page_min <= v && max_diff >= (uint32_t) abs(v - *page_min) ) {
+                   min_index = itemindex;
+                   count_pages_scanned = itemindex;
+                  }
+                }
+              }
+           break;
+         }
+         case Type::FLOAT:
+         {
+             float v = *((float*) predicate);
+             
+             
+             if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      float* page_min_curr = (float*)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( v < *page_min_curr ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        float* page_min_next = (float*)col_index.min_values[itemindex+1].c_str();
+                        if ( v > *page_min_next ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( v < *page_min_next && v > *page_min_curr ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                        begin_index = last_index + 1;
+                        count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else{
+                for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                  float* page_min = (float*)(void *)col_index.min_values[itemindex].c_str();
+                  float* page_max = (float*)(void *)col_index.max_values[itemindex].c_str();
+             
+                  auto epsilon = std::numeric_limits<float>::epsilon();
+                  float error_factor = 9*pow(10,15);
+                  float max_diff = *page_max - *page_min;
+
+                  if ( *page_min < v && v < *page_max ) {
+                    min_index = itemindex;
+                  }
+                  count_pages_scanned = itemindex;
+                }
+                min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index;
+              }
+           break;
+         }
+         case Type::DOUBLE:
+         {
+             double v = *((double*) predicate);
+             
+             
+             if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      double* page_min_curr = (double*)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( v < *page_min_curr ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        double* page_min_next = (double*)col_index.min_values[itemindex+1].c_str();
+                        if ( v > *page_min_next ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( v < *page_min_next && v > *page_min_curr ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                        begin_index = last_index + 1;
+                        count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else{
+                for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                   double* page_min = (double*)(void *)col_index.min_values[itemindex].c_str();
+                   double* page_max = (double*)(void *)col_index.max_values[itemindex].c_str();
+                   double max_diff = *page_max - *page_min;
+
+                   auto epsilon = std::numeric_limits<double>::epsilon();
+                   double error_factor = 9*pow(10,15);
+
+                   if ( *page_min < v && v < *page_max ) {
+                      min_index = itemindex;
+                   }
+                   count_pages_scanned = itemindex;
+                }
+                min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index;
+              }
+           break;
+         }
+         case Type::BYTE_ARRAY:
+         {
+             char* v = (char*) predicate;
+             char* p = (char*) predicate;
+             // remove leading zeroes in the predicate, if present.
+             int checkzero = 0;
+             while ( p [checkzero] == '0') checkzero++; 
+             p = (p + checkzero);
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+             
+
+             std::string str(v);
+             if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      std::string page_min_curr_orig = (std::string)col_index.min_values[itemindex].c_str(); 
+                      std::string page_min_curr(page_min_curr_orig.substr(page_min_curr_orig.length() - str.length(),str.length()));
+                      if ( test.compare(page_min_curr_orig) < 0 ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        std::string page_min_next_orig = (std::string)col_index.min_values[itemindex+1].c_str();
+                        std::string page_min_next(page_min_next_orig.substr(page_min_curr_orig.length() - str.length(),str.length()));
+                        if ( test.compare(page_min_next_orig) > 0 ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( test.compare(page_min_next_orig) < 0 && test.compare(page_min_curr_orig) > 0 ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                         begin_index = last_index + 1;
+                         count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else {
+                 for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                    std::string page_min_orig = (std::string)col_index.min_values[itemindex].c_str();
+                    std::string page_max_orig = (std::string)col_index.max_values[itemindex].c_str();
+                    std::string page_min(page_min_orig.substr(page_min_orig.length()-str.length(),str.length()));
+                    std::string page_max(page_max_orig.substr(page_max_orig.length()-str.length(),str.length()));
+
+                   if ( test.compare(page_min_orig) > 0 && test.compare(page_max_orig) < 0 ) {
+                      min_index = itemindex;
+                   }
+                   count_pages_scanned = itemindex;
+                }
+                min_index = (count_pages_scanned == ((int)offset_index.page_locations.size()-1) && min_index == -1)? count_pages_scanned:min_index;
+              }
+           break;
+         }
+         case Type::FIXED_LEN_BYTE_ARRAY:
+         {
+             char* v = (char*) predicate;
+
+             uint8_t ptr = *v;
+             ByteArray pba((uint32_t)strlen(v),&ptr);
+             
+
+             std::string str(v);
+             if(sorted && with_binarysearch){
+                  if(col_index.min_values.size() >= 2){
+                    uint64_t last_index = col_index.min_values.size()-1;
+                    uint64_t begin_index = 0;
+                    uint64_t itemindex = (begin_index + last_index)/2;
+
+                    while(begin_index <= last_index) {
+                      itemindex = (begin_index + last_index)/2;
+                      std::string page_min_curr = (std::string)col_index.min_values[itemindex].c_str(); 
+                      
+                      if ( str.compare(page_min_curr) < 0 ){
+                         last_index -= 1;
+                         count_pages_scanned++;
+                         continue;
+                      } 
+                      if ( itemindex < last_index ){
+                        std::string page_min_next = (std::string)col_index.min_values[itemindex+1].c_str();
+                        if ( str.compare(page_min_next) > 0 ){
+                          begin_index += 1;
+                          count_pages_scanned++;
+                        }
+                        if ( str.compare(page_min_next) < 0 && str.compare(page_min_curr) > 0 ){
+                            begin_index = last_index + 1;
+                            count_pages_scanned++;
+                        }
+                      }else{
+                         begin_index = last_index + 1;
+                         count_pages_scanned++;
+                      }
+                    }
+                    min_index = itemindex;
+                  }
+                  else
+                  {
+                    min_index = 0;
+                  }
+              }
+              else {
+                 for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                    std::string page_min = col_index.min_values[itemindex];
+                    std::string page_max = col_index.max_values[itemindex];
+
+                   if ( str.compare(page_min)>0 && str.compare(page_max)<0 ) {
+                      min_index = itemindex;
+                      count_pages_scanned = itemindex;
+                   }
+                }
+              }
+           break;
+         }
+         default:
+         {
+           parquet::ParquetException::NYI("type reader not implemented");
+         }
+      }
+      
+      if (with_page_bf)
+         page_bloom_filter_has_value(source_,properties_,predicate, offset_index,min_index,type_num, row_index);
+      else 
+         row_index = offset_index.page_locations[min_index].first_row_index;
+  }
+  
+
+  void GetPageIndex(std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_, void* predicate, 
+                    std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index,
+                    parquet::format::ColumnIndex col_index, parquet::format::OffsetIndex offset_index,
+                    Type::type type_num, bool sorted, bool with_binarysearch, int64_t& count_pages_scanned,
+                    bool with_bloom_filter, bool with_page_bf) const {
+      
+      switch(type_num) {
+         case Type::BOOLEAN:{
+           // doesn't make sense for bool
+           break;
+         }
+         case Type::INT32:{
+              int32_t v = *((int32_t*) predicate);
+              
+              for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                int32_t* page_min = (int32_t*)(void *)col_index.min_values[itemindex].c_str();
+                int32_t* page_max = (int32_t*)(void *)col_index.max_values[itemindex].c_str();
+                int32_t max_diff = *page_max - *page_min;
+
+                if ( *page_min <= v && max_diff >= abs(v - *page_min) ) {
+                  unsorted_min_index.push_back(itemindex);
+                  count_pages_scanned = itemindex;
+                }
+              }
+           break;
+         }
+         case Type::INT64:
+         {
+             int64_t v = *((int64_t*) predicate);
+             
+             for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                int64_t* page_min = (int64_t*)(void *)col_index.min_values[itemindex].c_str();
+                int64_t* page_max = (int64_t*)(void *)col_index.max_values[itemindex].c_str();
+                int64_t max_diff = *page_max - *page_min;
+           
+                if ( *page_min <= v && max_diff >= abs(v - *page_min) ) {
+                  unsorted_min_index.push_back(itemindex);
+                  count_pages_scanned = itemindex;
+                }
+              }
+
+            break;
+         }
+         case Type::INT96:
+         {
+           break;
+         }
+         case Type::FLOAT:
+         {
+             float v = *((float*) predicate);
+             
+             for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                float* page_min = (float*)(void *)col_index.min_values[itemindex].c_str();
+                float* page_max = (float*)(void *)col_index.max_values[itemindex].c_str();
+             
+                auto epsilon = std::numeric_limits<float>::epsilon();
+                float error_factor = 9*pow(10,15);
+                float max_diff = *page_max - *page_min;
+
+                if ( fabs(max_diff - (fabs(v-*page_min)+fabs(*page_max-v))) <= error_factor*epsilon ) {
+
+                  unsorted_min_index.push_back(itemindex);
+                  count_pages_scanned = itemindex;
+
+                }
+             }
+
+           break;
+         }
+         case Type::DOUBLE:
+         {
+             double v = *((double*) predicate);
+             
+             
+              for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                double* page_min = (double*)(void *)col_index.min_values[itemindex].c_str();
+                double* page_max = (double*)(void *)col_index.max_values[itemindex].c_str();
+                double max_diff = *page_max - *page_min;
+
+                auto epsilon = std::numeric_limits<double>::epsilon();
+                double error_factor = 9*pow(10,15);
+
+                if ( fabs(max_diff - (fabs(v-*page_min)+fabs(*page_max-v))) <= error_factor*epsilon  ) {
+
+                  unsorted_min_index.push_back(itemindex);
+                  count_pages_scanned = itemindex;
+                }
+              }
+
+           break;
+         }
+         case Type::BYTE_ARRAY:
+         {
+             char* v = (char*) predicate;
+
+             const char* p = (char*) predicate;
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+
+             std::string str(v);
+             
+             for (uint64_t itemindex = 0;itemindex < offset_index.page_locations.size();itemindex++) {
+                std::string page_min_orig = col_index.min_values[itemindex];
+                std::string page_max_orig = col_index.max_values[itemindex];
+                std::string page_min(page_min_orig.substr(page_min_orig.length()-str.length(),str.length()));
+                std::string page_max(page_max_orig.substr(page_max_orig.length()-str.length(),str.length()));
+
+                if ( test.compare(page_min_orig)>0 && test.compare(page_max_orig)<0 ) {
+                  unsorted_min_index.push_back(itemindex);
+                  count_pages_scanned = itemindex;
+                }
+             }
+
+           break;
+         }
+         case Type::FIXED_LEN_BYTE_ARRAY:
+         {
+             
+           break;
+         }
+         default:
+         {
+           parquet::ParquetException::NYI("type reader not implemented");
+         }
+      }
+      
+      if (with_page_bf)
+         page_bloom_filter_has_value(source_,properties_,predicate, offset_index,unsorted_min_index,type_num, unsorted_row_index);
+      else {
+          for (int64_t min_index : unsorted_min_index)
+            unsorted_row_index.push_back(offset_index.page_locations[min_index].first_row_index);
+      }
+         
+  }
+
+
+  void GetPageWithoutIndex(std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_, void* predicate, 
+                    int64_t& min_index,int64_t& row_index, Type::type type_num,
+                    bool with_binarysearch, int64_t& count_pages_scanned,
+                    parquet::BlockSplitBloomFilter& blf, bool with_bloom_filter, bool with_page_bf) const {
+      
+      switch(type_num) {
+         case Type::BOOLEAN:{
+           // doesn't make sense for bool
+           break;
+         }
+         case Type::INT32:{
+              int32_t v = *((int32_t*) predicate);
+              
+              if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) {
+                 row_index = -1; return;
+              }
+
+              
+           break;
+         }
+         case Type::INT64:
+         {
+             int64_t v = *((int64_t*) predicate);
+             if (with_bloom_filter && !blf.FindHash(blf.Hash(v))) {
+                 row_index = -1; return;
+             }
+             
+             
+            break;
+         }
+         case Type::INT96:
+         {
+             uint32_t v = *((uint32_t*) predicate);
+              
+           break;
+         }
+         case Type::FLOAT:
+         {
+             float v = *((float*) predicate);
+             if (with_bloom_filter && !blf.FindHash(blf.Hash((float)(int64_t)v))) {
+                 row_index = -1; return;
+             }
+             
+             
+           break;
+         }
+         case Type::DOUBLE:
+         {
+             double v = *((double*) predicate);
+             if (with_bloom_filter && !blf.FindHash(blf.Hash((double)(int64_t)v))) {
+                 row_index = -1; return;
+             }
+             
+             
+           break;
+         }
+         case Type::BYTE_ARRAY:
+         {
+             char* v = (char*) predicate;
+             char* p = (char*) predicate;
+             // remove leading zeroes in the predicate, if present.
+             int checkzero = 0;
+             while ( p [checkzero] == '0') checkzero++; 
+             p = (p + checkzero);
+             char dest[FIXED_LENGTH];
+             for ( uint32_t i = 0; i < (FIXED_LENGTH-strlen(p));i++) dest[i] = '0';
+             for ( uint32_t i = (FIXED_LENGTH-strlen(p)); i < FIXED_LENGTH;i++) dest[i] = p[i-(FIXED_LENGTH-strlen(p))];
+             dest[FIXED_LENGTH] = '\0';
+             std::string test(dest);
+             ByteArray pba(test.size(),reinterpret_cast<const uint8_t*>(test.c_str()));
+             if (with_bloom_filter && !blf.FindHash(blf.Hash(&pba))) {
+                 row_index = -1; return;
+             }
+
+             std::string str(v);
+             
+           break;
+         }
+         case Type::FIXED_LEN_BYTE_ARRAY:
+         {
+             char* v = (char*) predicate;
+
+             uint8_t ptr = *v;
+             ByteArray pba((uint32_t)strlen(v),&ptr);
+             if (with_bloom_filter && !blf.FindHash(blf.Hash(&pba))) {
+                 row_index = -1; return;
+             }
+
+             std::string str(v);
+             
+           break;
+         }
+         default:
+         {
+           parquet::ParquetException::NYI("type reader not implemented");
+         }
+      }
+      
+    /*if (with_page_bf)
+         page_bloom_filter_has_value(source_,properties_,predicate, offset_index,min_index,type_num, row_index);*/
+      
+  }
+
+  void GetPageWithRowIndex(int64_t& page_index, parquet::format::OffsetIndex offset_index, int64_t& row_index) const {
+      
+      for (uint64_t page_index = 0;page_index < offset_index.page_locations.size() && 
+              offset_index.page_locations[page_index].first_row_index!=row_index;page_index++) {
+          
+      }
+  }
+
+
+
+/// ---- Page filtering ----
+/// A Parquet file can contain a so called "page index". It has two parts, a column index
+/// and an offset index. The column index contains statistics like minimum and maximum
+/// values for each page. The offset index contains information about page locations in
+/// the Parquet file and top-level row ranges. HdfsParquetScanner evaluates the min/max
+/// conjuncts against the column index and determines the surviving pages with the help of
+/// the offset index. Then it will configure the column readers to only scan the pages
+/// and row ranges that have a chance to store rows that pass the conjuncts.
+
+
+  bool HasPageIndex(ColumnChunkMetaData* col) {
+
+    int64_t column_index_offset = col->column_index_offset();
+    int64_t offset_index_offset = col->offset_index_offset();
+    int64_t column_index_length = col->column_index_length();
+    int64_t offset_index_length = col->offset_index_length();
+
+    int64_t ci_start = std::numeric_limits<int64_t>::max();
+    int64_t oi_start = std::numeric_limits<int64_t>::max();
+    int64_t ci_end = -1;
+    int64_t oi_end = -1;
+
+    if (column_index_offset && column_index_length){
+       ci_start = std::min(ci_start, column_index_offset);
+       ci_end = std::max(ci_end, column_index_offset + column_index_length);
+    }
+    if (offset_index_offset && offset_index_length) {
+       oi_start = std::min(oi_start, offset_index_offset);
+       oi_end = std::max(oi_end, offset_index_offset + offset_index_length);
+    }
 
+    return oi_end != -1 && ci_end != -1; 
     std::unique_ptr<ColumnCryptoMetaData> crypto_metadata = col->crypto_metadata();
 
     // Column is encrypted only if crypto_metadata exists.
@@ -166,9 +1272,70 @@ class SerializedRowGroup : public RowGroupReader::Contents {
       throw ParquetException("RowGroup is noted as encrypted but no file decryptor");
     }
 
+  void DeserializeColumnIndex(const ColumnChunkMetaData& col_chunk, parquet::format::ColumnIndex* column_index, std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_) {
+    int64_t ci_start = std::numeric_limits<int64_t>::max(); 
+    int64_t ci_end = std::numeric_limits<int64_t>::max();
+    string_view page_buffer;
+    ci_start = std::min(ci_start,col_chunk.column_index_offset());
+    ci_end = std::max(ci_end,col_chunk.column_index_offset() + col_chunk.column_index_length());
+    int8_t buffer_offset = col_chunk.column_index_offset() - ci_start;
+    uint32_t length = col_chunk.column_index_length();
+
+    std::shared_ptr<ArrowInputStream> stream_ = properties_.GetStream(source_, ci_start, length);
+    PARQUET_THROW_NOT_OK(stream_->Peek(kColumnIndexReadSize,&page_buffer));
+    if (page_buffer.size() == 0) {
+       return;
+    }
+
+    DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(page_buffer.data()), &length, column_index);
+  }
+
+  void DeserializeOffsetIndex(const ColumnChunkMetaData& col_chunk, parquet::format::OffsetIndex* offset_index, std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_) {
+    int64_t oi_start = std::numeric_limits<int64_t>::max(); 
+    int64_t oi_end = std::numeric_limits<int64_t>::max();
+    string_view page_buffer;
+    oi_start = std::min(oi_start,col_chunk.offset_index_offset());
+    oi_end = std::min(oi_end, col_chunk.offset_index_offset() + col_chunk.offset_index_length());
+    int8_t buffer_offset = col_chunk.offset_index_offset() - oi_start;
+    uint32_t length = col_chunk.offset_index_length();
+
+    std::shared_ptr<ArrowInputStream> stream_ = properties_.GetStream(source_, oi_start, length);
+    PARQUET_THROW_NOT_OK(stream_->Peek(kOffsetIndexReadSize, &page_buffer));
+    if (page_buffer.size() == 0) {
+       return;
+    }
+
+    DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(page_buffer.data()), &length, offset_index);
+  }
+
+  void DeserializeBloomFilter(const ColumnChunkMetaData& col_chunk, parquet::BlockSplitBloomFilter& blf, std::shared_ptr<ArrowInputFile>& source_, ReaderProperties& properties_) {
+      int64_t blf_offset = col_chunk.bloom_filter_offset();
+      std::shared_ptr<ArrowInputStream> stream_ = properties_.GetStream(source_, blf_offset,BloomFilter::kMaximumBloomFilterBytes);
+      blf = BlockSplitBloomFilter::Deserialize(stream_.get());
+  }
+
+  std::unique_ptr<PageReader> GetColumnPageReaderWithIndex(int column_index, void* predicate, int64_t& min_index, 
+                              int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool with_binarysearch, int64_t& count_pages_scanned,
+                              int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                              std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index,
+                              parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf,
+                              bool& first_time_blf,bool& first_time_index,
+                              float& blf_load_time, float& index_load_time) {
+    // Read column chunk from the file
+    auto col = row_group_metadata_->ColumnChunk(column_index);
+
+<<<<<<< HEAD
     constexpr auto kEncryptedRowGroupsLimit = 32767;
     if (i > kEncryptedRowGroupsLimit) {
       throw ParquetException("Encrypted files cannot contain more than 32767 row groups");
+=======
+    auto sorting_columns = row_group_metadata_->sorting_columns();
+
+    int64_t col_start = col->data_page_offset();
+    if (col->has_dictionary_page() && col->dictionary_page_offset() > 0 &&
+        col_start > col->dictionary_page_offset()) {
+      col_start = col->dictionary_page_offset();
+>>>>>>> 5f0c77973... sorting columns
     }
 
     // The column is encrypted
@@ -182,6 +1349,63 @@ class SerializedRowGroup : public RowGroupReader::Contents {
                         static_cast<int16_t>(i), meta_decryptor, data_decryptor);
       return PageReader::Open(stream, col->num_values(), col->compression(),
                               properties_.memory_pool(), &ctx);
+    int64_t col_length = col->total_compressed_size();
+    
+    if ( with_bloom_filter ) {
+
+      if (first_time_blf) {
+        auto start_time = std::chrono::high_resolution_clock::now();
+        DeserializeBloomFilter(*reinterpret_cast<ColumnChunkMetaData*>(col.get()),blf,source_,properties_);
+        auto end_time = std::chrono::high_resolution_clock::now();
+        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+        first_time_blf = false;
+        blf_load_time = (float) duration.count();
+      }
+
+      GetPageWithoutIndex(source_, properties_, predicate, min_index,row_index,type_num, with_binarysearch, count_pages_scanned, blf, with_bloom_filter, with_page_bf);    
+    }
+
+    if (row_index != -1 && with_index ){
+      bool has_page_index = HasPageIndex((reinterpret_cast<ColumnChunkMetaData*>(col.get())));
+      if ( has_page_index ) {
+        
+        if (first_time_index) {
+           auto start_time = std::chrono::high_resolution_clock::now();
+           DeserializeColumnIndex(*reinterpret_cast<ColumnChunkMetaData*>(col.get()),&col_index, source_, properties_);
+           DeserializeOffsetIndex(*reinterpret_cast<ColumnChunkMetaData*>(col.get()),&offset_index, source_, properties_);
+           auto end_time = std::chrono::high_resolution_clock::now();
+           auto duration = std::chrono::duration_cast<std::chrono::microseconds>(start_time-end_time);
+           index_load_time = (float) duration.count();
+           first_time_index = false;
+        }
+
+        total_num_pages = offset_index.page_locations.size();
+        last_first_row = offset_index.page_locations[offset_index.page_locations.size()-1].first_row_index;
+        if ( predicate_col == column_index ) {
+            bool sorted = isSorted(col_index,offset_index,type_num);
+            if ( sorted )
+              GetPageIndex(source_, properties_, predicate, min_index,row_index, col_index,offset_index,type_num,sorted, with_binarysearch, count_pages_scanned, with_bloom_filter, with_page_bf);    
+            else
+            {
+              GetPageIndex(source_, properties_, predicate, unsorted_min_index,unsorted_row_index, col_index,offset_index,type_num,sorted, with_binarysearch, count_pages_scanned, with_bloom_filter, with_page_bf);    
+            }
+            
+        }
+        else 
+           GetPageWithRowIndex(min_index, offset_index, row_index);
+      }
+    }
+    // PARQUET-816 workaround for old files created by older parquet-mr
+    const ApplicationVersion& version = file_metadata_->writer_version();
+    if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) {
+      // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the
+      // dictionary page header size in total_compressed_size and total_uncompressed_size
+      // (see IMPALA-694). We add padding to compensate.
+      int64_t size = -1;
+      PARQUET_THROW_NOT_OK(source_->GetSize(&size));
+      int64_t bytes_remaining = size - (col_start + col_length);
+      int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining);
+      col_length += padding;
     }
 
     // The column is encrypted with its own key
@@ -197,6 +1421,52 @@ class SerializedRowGroup : public RowGroupReader::Contents {
                       static_cast<int16_t>(i), meta_decryptor, data_decryptor);
     return PageReader::Open(stream, col->num_values(), col->compression(),
                             properties_.memory_pool(), &ctx);
+    std::shared_ptr<ArrowInputStream> stream =
+        properties_.GetStream(source_, col_start , col_length);
+
+    return PageReader::Open(stream, col->num_values(), col->compression(),
+                            properties_.memory_pool());
+  }
+
+
+
+  std::unique_ptr<PageReader> GetColumnPageReader(int i) override {
+    // Read column chunk from the file
+    auto col = row_group_metadata_->ColumnChunk(i);
+
+    int64_t col_start = col->data_page_offset();
+    if (col->has_dictionary_page() && col->dictionary_page_offset() > 0 &&
+        col_start > col->dictionary_page_offset()) {
+      col_start = col->dictionary_page_offset();
+    }
+
+    int64_t col_length = col->total_compressed_size();
+
+    bool has_page_index = HasPageIndex((reinterpret_cast<ColumnChunkMetaData*>(col.get())));
+    if ( has_page_index ) {
+        parquet::format::ColumnIndex col_index;
+        parquet::format::OffsetIndex offset_index;
+        DeserializeColumnIndex(*reinterpret_cast<ColumnChunkMetaData*>(col.get()),&col_index, source_, properties_);
+        DeserializeOffsetIndex(*reinterpret_cast<ColumnChunkMetaData*>(col.get()),&offset_index, source_, properties_);
+    }
+
+    // PARQUET-816 workaround for old files created by older parquet-mr
+    const ApplicationVersion& version = file_metadata_->writer_version();
+    if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) {
+      // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the
+      // dictionary page header size in total_compressed_size and total_uncompressed_size
+      // (see IMPALA-694). We add padding to compensate.
+      int64_t size = -1;
+      PARQUET_THROW_NOT_OK(source_->GetSize(&size));
+      int64_t bytes_remaining = size - (col_start + col_length);
+      int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining);
+      col_length += padding;
+    }
+
+    std::shared_ptr<ArrowInputStream> stream =
+        properties_.GetStream(source_, col_start, col_length);
+    return PageReader::Open(stream, col->num_values(), col->compression(),
+                            properties_.memory_pool());
   }
 
  private:
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index 79af3cd2b35..a85da6d28dc 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -26,6 +26,7 @@
 #include "parquet/metadata.h"  // IWYU pragma: keep
 #include "parquet/platform.h"
 #include "parquet/properties.h"
+#include "parquet/bloom_filter.h"
 
 namespace parquet {
 
@@ -35,6 +36,8 @@ class PageReader;
 class RandomAccessSource;
 class RowGroupMetaData;
 
+static const uint32_t FIXED_LENGTH = 124;
+
 class PARQUET_EXPORT RowGroupReader {
  public:
   // Forward declare a virtual class 'Contents' to aid dependency injection and more
@@ -43,6 +46,12 @@ class PARQUET_EXPORT RowGroupReader {
   struct Contents {
     virtual ~Contents() {}
     virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
+    virtual std::unique_ptr<PageReader> GetColumnPageReaderWithIndex(int i,void* predicate, int64_t& min_index, int predicate_Col, int64_t& row_index,Type::type type_num,bool with_index, bool binary_search, int64_t& count_pages_scanned,
+                                            int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                                            std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index,
+                                            parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf,
+                                            bool& first_time_blf,bool& first_time_index,
+                                            float& blf_load_time, float& index_load_time) = 0;
     virtual const RowGroupMetaData* metadata() const = 0;
     virtual const ReaderProperties* properties() const = 0;
   };
@@ -56,11 +65,33 @@ class PARQUET_EXPORT RowGroupReader {
   // column. Ownership is shared with the RowGroupReader.
   std::shared_ptr<ColumnReader> Column(int i);
 
+  std::shared_ptr<ColumnReader> ColumnWithIndex(int i,void* predicate, int64_t& min_index, int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned,
+                                            int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                                            std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index);
+
   std::unique_ptr<PageReader> GetColumnPageReader(int i);
 
+  std::unique_ptr<PageReader> GetColumnPageReaderWithIndex(int column_index, void* predicate, int64_t& min_index , int predicate_col, int64_t& row_index,Type::type type_num, bool with_index, bool binary_search, int64_t& count_pages_scanned,
+                                            int64_t& total_num_pages, int64_t& last_first_row, bool with_bloom_filter, bool with_page_bf,
+                                            std::vector<int64_t>& unsorted_min_index, std::vector<int64_t>& unsorted_row_index, 
+                                            parquet::format::ColumnIndex& col_index, parquet::format::OffsetIndex& offset_index, BlockSplitBloomFilter& blf,
+                                            bool& first_time_blf,bool& first_time_index,
+                                            float& blf_load_time, float& index_load_time);
+
+ float GetBLFLoadTime() { return blf_load_time; };
+
+ float GetIndexLoadTime() { return index_load_time; };
+
  private:
   // Holds a pointer to an instance of Contents implementation
   std::unique_ptr<Contents> contents_;
+  parquet::format::ColumnIndex col_index;
+  parquet::format::OffsetIndex offset_index;
+  BlockSplitBloomFilter blf;
+  bool first_time_blf = true;
+  bool first_time_index = true;
+  float blf_load_time = 0.0; 
+  float index_load_time = 0.0;
 };
 
 class PARQUET_EXPORT ParquetFileReader {
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 63dabe76c95..5435856e957 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -50,8 +50,52 @@ void RowGroupWriter::Close() {
   }
 }
 
+void RowGroupWriter::CloseWithIndex(bool use_index, bool with_bf) {
+  if (contents_) {
+    contents_->CloseWithIndex(use_index, with_bf);
+  }
+}
+
+void RowGroupWriter::AppendRowGroupBloomFilter(int32_t values) {
+   if (contents_) {
+      contents_->AppendRowGroupBloomFilter(values);
+   }
+}
+
+void RowGroupWriter::AppendRowGroupBloomFilter(int64_t values) {
+   if (contents_) {
+      contents_->AppendRowGroupBloomFilter(values);
+   }
+}
+
+void RowGroupWriter::AppendRowGroupBloomFilter(float values) {
+   if (contents_) {
+      contents_->AppendRowGroupBloomFilter(values);
+   }
+}
+
+void RowGroupWriter::AppendRowGroupBloomFilter(double values) {
+   if (contents_) {
+      contents_->AppendRowGroupBloomFilter(values);
+   }
+}
+
+void RowGroupWriter::AppendRowGroupBloomFilter(ByteArray* values) {
+   if (contents_) {
+      contents_->AppendRowGroupBloomFilter(values);
+   }
+}
+
+void RowGroupWriter::InitBloomFilter(int num_rows,uint32_t& num_bytes, float fpp) {
+   if (contents_) {
+      contents_->InitBloomFilter(num_rows,num_bytes, fpp);
+   }
+}
+
 ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); }
 
+ColumnWriter* RowGroupWriter::NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) { return contents_->NextColumnWithIndex(num_bytes,with_index, with_bf, fpp); }
+
 ColumnWriter* RowGroupWriter::column(int i) { return contents_->column(i); }
 
 int64_t RowGroupWriter::total_compressed_bytes() const {
@@ -93,7 +137,9 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
         next_column_index_(0),
         num_rows_(0),
         buffered_row_group_(buffered_row_group),
-        file_encryptor_(file_encryptor) {
+        file_encryptor_(file_encryptor, 
+        blf_ (metadata_->num_columns()) {
+    
     if (buffered_row_group) {
       InitColumns();
     } else {
@@ -143,6 +189,49 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
     return column_writers_[0].get();
   }
 
+  ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) override {
+    use_index = true;
+    if (buffered_row_group_) {
+      throw ParquetException(
+          "NextColumn() is not supported when a RowGroup is written by size");
+    }
+
+    if (column_writers_[0]) {
+      CheckRowsWritten();
+    }
+
+    // Throws an error if more columns are being written
+    auto col_meta = metadata_->NextColumnChunk();
+
+    if (column_writers_[0]) {
+      total_bytes_written_ += (with_index)? column_writers_[0]->CloseWithIndex(): column_writers_[0]->Close();
+      sink_->Tell(&file_pos_);
+      if (with_index)
+        column_writers_[0]->WriteIndex(file_pos_,column_index_offset,offset_index_offset);
+    }
+
+    //total_bytes_written_ += blf_[next_column_index_].GetBitsetSize();
+    if ( column_writers_[0] && with_bf ) {
+      // next column bloom filter initialized
+      num_bytes = blf_[next_column_index_].OptimalNumOfBits(column_writers_[0]->rows_written() , fpp);
+      blf_[next_column_index_].Init(num_bytes);
+      
+      //current column writer saved for future use
+      all_used_cws_.push_back(column_writers_[0]);
+    }
+
+    ++next_column_index_;
+
+    const ColumnDescriptor* column_descr = col_meta->descr();
+    std::unique_ptr<PageWriter> pager =
+        PageWriter::Open(sink_, properties_->compression(column_descr->path()), col_meta,
+                         properties_->memory_pool());
+    column_writers_[0] = ColumnWriter::Make(col_meta, std::move(pager), properties_);
+
+    
+    return column_writers_[0].get();
+  }
+
   ColumnWriter* column(int i) override {
     if (!buffered_row_group_) {
       throw ParquetException(
@@ -185,18 +274,97 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
       for (size_t i = 0; i < column_writers_.size(); i++) {
         if (column_writers_[i]) {
           total_bytes_written_ += column_writers_[i]->Close();
-          column_writers_[i].reset();
         }
       }
+      column_writers_.clear();
+
+      // Ensures all columns have been written
+      metadata_->set_num_rows(num_rows_);
+      metadata_->Finish(total_bytes_written_);
+    }
+  }
+
+  void CloseWithIndex(bool use_index, bool with_bf) override {
+    if (!closed_) {
+      closed_ = true;
+      CheckRowsWritten();
 
+      for (size_t i = 0; i < column_writers_.size(); i++) {
+        if (column_writers_[i]) {
+          total_bytes_written_ += (!use_index)? column_writers_[i]->Close(): column_writers_[i]->CloseWithIndex();
+          if (use_index)
+            column_writers_[i]->WriteIndex(0,column_index_offset,offset_index_offset);
+          
+          all_used_cws_.push_back(column_writers_[i]);
+          column_writers_[i].reset();
+        }
+      }
+      
+      if ( with_bf )
+        WriteBloomFilterOffsets();
       column_writers_.clear();
 
       // Ensures all columns have been written
       metadata_->set_num_rows(num_rows_);
       metadata_->Finish(total_bytes_written_, row_group_ordinal_);
+      metadata_->Finish(total_bytes_written_);
+
+      
     }
   }
 
+  void AppendRowGroupBloomFilter(int32_t values) override {
+      blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values));
+  }
+
+  void AppendRowGroupBloomFilter(int64_t values) override {
+      blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values));
+  }
+
+  void AppendRowGroupBloomFilter(float values) override {
+      blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash((float)(int64_t)values));
+  }
+
+  void AppendRowGroupBloomFilter(double values) override {
+      blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash((double)(int64_t)values));
+  }
+
+  void AppendRowGroupBloomFilter(ByteArray* values) override {
+      blf_[next_column_index_-1].InsertHash(blf_[next_column_index_-1].Hash(values));
+  }
+
+
+  void InitBloomFilter(int num_rows,uint32_t& num_bytes, float fpp) override {
+      // first time column initialization, not possible in nextcolumnchunk
+      num_bytes = blf_[next_column_index_].OptimalNumOfBits(num_rows , fpp);
+      blf_[next_column_index_].Init(num_bytes);
+  }
+
+  void WriteBloomFilterOffsets(){
+      int64_t filepos;
+      for (size_t i = 0; i < all_used_cws_.size(); i++) {
+        sink_->Tell(&filepos);
+        if (all_used_cws_[i]) {
+          if (false) {
+            format::BloomFilterHeader blfh;
+            blfh.__set_numBytes(blf_[i].GetBitsetSize());
+            blfh.__set_hash(blf_[i].GetHashStrategy());
+            blfh.__set_algorithm(blf_[i].GetHashAlgorithm());
+            blfh.__set_compression(blf_[i].GetBFCompression());
+        
+            std::unique_ptr<ThriftSerializer> thrift_serializer_;
+            thrift_serializer_.reset(new ThriftSerializer);
+            thrift_serializer_->Serialize(&blfh, sink_.get());
+          }
+
+          blf_[i].WriteTo(sink_.get());
+
+          all_used_cws_[i]->WriteBloomFilterOffset(filepos);
+          all_used_cws_[i].reset();
+        }
+      }
+  }
+
  private:
   std::shared_ptr<ArrowOutputStream> sink_;
   mutable RowGroupMetaDataBuilder* metadata_;
@@ -208,6 +376,8 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
   mutable int64_t num_rows_;
   bool buffered_row_group_;
   InternalFileEncryptor* file_encryptor_;
+  bool use_index = false;
+
 
   void CheckRowsWritten() const {
     // verify when only one column is written at a time
@@ -252,6 +422,11 @@ class RowGroupSerializer : public RowGroupWriter::Contents {
   }
 
   std::vector<std::shared_ptr<ColumnWriter>> column_writers_;
+  std::vector<std::shared_ptr<ColumnWriter>> all_used_cws_;
+  int64_t column_index_offset = 0;
+  int64_t offset_index_offset = 0;
+  std::vector<BlockSplitBloomFilter> blf_;
+  int64_t file_pos_;
 };
 
 // ----------------------------------------------------------------------
@@ -296,6 +471,23 @@ class FileSerializer : public ParquetFileWriter::Contents {
     }
   }
 
+  void CloseWithIndex(bool use_index, bool with_bf) override {
+    if (is_open_) {
+      // If any functions here raise an exception, we set is_open_ to be false
+      // so that this does not get called again (possibly causing segfault)
+      is_open_ = false;
+      if (row_group_writer_) {
+        num_rows_ += row_group_writer_->num_rows();
+        row_group_writer_->CloseWithIndex(use_index, with_bf);
+      }
+      row_group_writer_.reset();
+
+      // Write magic bytes and metadata
+      file_metadata_ = metadata_->Finish();
+      WriteFileMetaData(*file_metadata_, sink_.get());
+    }
+  }
+
   int num_columns() const override { return schema_.num_columns(); }
 
   int num_row_groups() const override { return num_row_groups_; }
@@ -555,6 +747,14 @@ void ParquetFileWriter::Close() {
   }
 }
 
+void ParquetFileWriter::CloseWithIndex(bool use_index, bool with_bf) {
+  if (contents_) {
+    contents_->CloseWithIndex(use_index, with_bf);
+    file_metadata_ = contents_->metadata();
+    contents_.reset();
+  }
+}
+
 RowGroupWriter* ParquetFileWriter::AppendRowGroup() {
   return contents_->AppendRowGroup();
 }
@@ -567,6 +767,7 @@ RowGroupWriter* ParquetFileWriter::AppendRowGroup(int64_t num_rows) {
   return AppendRowGroup();
 }
 
+
 const std::shared_ptr<WriterProperties>& ParquetFileWriter::properties() const {
   return contents_->properties();
 }
diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h
index 8caa5efbab5..48d6f6e80b8 100644
--- a/cpp/src/parquet/file_writer.h
+++ b/cpp/src/parquet/file_writer.h
@@ -47,12 +47,29 @@ class PARQUET_EXPORT RowGroupWriter {
 
     // to be used only with ParquetFileWriter::AppendRowGroup
     virtual ColumnWriter* NextColumn() = 0;
+
+    virtual ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes,bool with_index, bool with_bf, float fpp) = 0;
+
+    virtual void AppendRowGroupBloomFilter(int32_t values) = 0;
+
+    virtual void AppendRowGroupBloomFilter(int64_t values) = 0;
+    
+    virtual void AppendRowGroupBloomFilter(float values) = 0;
+
+    virtual void AppendRowGroupBloomFilter(double values) = 0;
+
+    virtual void AppendRowGroupBloomFilter(ByteArray* values) = 0;
+
+    virtual void InitBloomFilter(int num_rows,uint32_t& blf_num_bits, float fpp) = 0;
+
     // to be used only with ParquetFileWriter::AppendBufferedRowGroup
     virtual ColumnWriter* column(int i) = 0;
 
     virtual int current_column() const = 0;
     virtual void Close() = 0;
 
+    virtual void CloseWithIndex(bool use_index, bool with_bf) = 0;
+
     // total bytes written by the page writer
     virtual int64_t total_bytes_written() const = 0;
     // total bytes still compressed but not written
@@ -69,10 +86,30 @@ class PARQUET_EXPORT RowGroupWriter {
   /// directly written to the sink, once a new column is started, the contents
   /// of the previous one cannot be modified anymore.
   ColumnWriter* NextColumn();
+<<<<<<< HEAD
   /// Index of currently written column. Equal to -1 if NextColumn()
   /// has not been called yet.
+=======
+
+  ColumnWriter* NextColumnWithIndex(uint32_t& num_bytes, bool with_index, bool with_bf, float fpp);
+
+  void AppendRowGroupBloomFilter(int32_t values);
+
+  void AppendRowGroupBloomFilter(int64_t values);
+  
+  void AppendRowGroupBloomFilter(float values);
+  
+  void AppendRowGroupBloomFilter(double values);
+  
+  void AppendRowGroupBloomFilter(ByteArray* values);
+
+  void InitBloomFilter(int num_rows,uint32_t& blf_num_bits, float fpp);
+  
+  /// Index of currently written column
+>>>>>>> c0fbc925b... write index
   int current_column();
   void Close();
+  void CloseWithIndex(bool use_index, bool with_bf);
 
   int num_columns() const;
 
@@ -141,6 +178,8 @@ class PARQUET_EXPORT ParquetFileWriter {
     // Perform any cleanup associated with the file contents
     virtual void Close() = 0;
 
+    virtual void CloseWithIndex(bool use_index, bool with_bf) = 0;
+
     /// \note Deprecated since 1.3.0
     RowGroupWriter* AppendRowGroup(int64_t num_rows);
 
@@ -187,6 +226,8 @@ class PARQUET_EXPORT ParquetFileWriter {
   void Open(std::unique_ptr<Contents> contents);
   void Close();
 
+  void CloseWithIndex(bool use_index, bool with_bf);
+
   // Construct a RowGroupWriter for the indicated number of rows.
   //
   // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
@@ -209,6 +250,7 @@ class PARQUET_EXPORT ParquetFileWriter {
   /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
   RowGroupWriter* AppendBufferedRowGroup();
 
+
   /// Number of columns.
   ///
   /// This number is fixed during the lifetime of the writer as it is determined via
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index cdfe505573f..c8e2bbdbdec 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -323,6 +323,14 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
     }
   }
 
+  inline int64_t column_index_length() const {
+    return column_->column_index_length;
+  }
+
+  inline int64_t bloom_filter_offset() const {
+    return column_->meta_data.bloom_filter_offset;
+  }
+  
  private:
   mutable std::shared_ptr<Statistics> possible_stats_;
   std::vector<Encoding::type> encodings_;
@@ -417,6 +425,23 @@ std::unique_ptr<ColumnCryptoMetaData> ColumnChunkMetaData::crypto_metadata() con
   return impl_->crypto_metadata();
 }
 
+int64_t ColumnChunkMetaData::offset_index_offset() const {
+  return impl_->offset_index_offset();
+}
+
+int64_t ColumnChunkMetaData::offset_index_length() const {
+  return impl_->offset_index_length();
+}
+
+int64_t ColumnChunkMetaData::column_index_length() const {
+  return impl_->column_index_length();
+}
+
+int64_t ColumnChunkMetaData::bloom_filter_offset() const {
+   return impl_->bloom_filter_offset();
+}
+
+
 // row-group metadata
 class RowGroupMetaData::RowGroupMetaDataImpl {
  public:
@@ -440,6 +465,7 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
   inline int64_t total_compressed_size() const {
     return row_group_->total_compressed_size;
   }
+  inline std::vector<parquet::format::SortingColumn> sorting_columns() { return row_group_->sorting_columns; }
 
   inline const SchemaDescriptor* schema() const { return schema_; }
 
@@ -487,6 +513,8 @@ int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_siz
 
 int64_t RowGroupMetaData::file_offset() const { return impl_->file_offset(); }
 
+std::vector<parquet::format::SortingColumn> RowGroupMetaData::sorting_columns() const { return impl_->sorting_columns(); }
+
 const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); }
 
 std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
@@ -1127,6 +1155,18 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     }
   }
 
+  void WriteIndex(int64_t& file_pos_, int64_t& column_index_offset, int64_t& offset_index_offset, uint32_t& ci_len, uint32_t& oi_len) {
+      column_chunk_->__set_column_index_offset(file_pos_+column_index_offset);
+      column_chunk_->__set_column_index_length(ci_len);
+      column_chunk_->__set_offset_index_offset(file_pos_+offset_index_offset);
+      column_chunk_->__set_offset_index_length(oi_len);
+      file_pos_ += offset_index_offset+oi_len;
+  }
+
+  void WriteBloomFilterOffset(int64_t& bloom_filter_offset) {
+      column_chunk_->meta_data.__set_bloom_filter_offset(bloom_filter_offset);
+  }
+
   void WriteTo(::arrow::io::OutputStream* sink) {
     ThriftSerializer serializer;
     serializer.Serialize(column_chunk_, sink);
@@ -1203,6 +1243,14 @@ void ColumnChunkMetaDataBuilder::WriteTo(::arrow::io::OutputStream* sink) {
   impl_->WriteTo(sink);
 }
 
+void ColumnChunkMetaDataBuilder::WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, uint32_t& ci_len, uint32_t& oi_len) {
+   impl_->WriteIndex(file_pos_,ci_offset, oi_offset,ci_len,oi_len);
+}
+
+void ColumnChunkMetaDataBuilder::WriteBloomFilterOffset(int64_t& file_pos_) {
+   impl_->WriteBloomFilterOffset(file_pos_);
+}
+
 const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const {
   return impl_->descr();
 }
@@ -1239,6 +1287,7 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
     return column_builder_ptr;
   }
 
+
   int current_column() { return next_column_ - 1; }
 
   void Finish(int64_t total_bytes_written, int16_t row_group_ordinal) {
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 0186342d7f6..147714ed7c8 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -25,10 +25,16 @@
 #include <vector>
 
 #include "arrow/util/key_value_metadata.h"
+#include <boost/any.hpp>
 #include "parquet/platform.h"
 #include "parquet/properties.h"
 #include "parquet/schema.h"
 #include "parquet/types.h"
+#include "parquet/thrift.h"
+
+#include "arrow/util/string_view.h"
+
+using arrow::util::string_view;
 
 namespace parquet {
 
@@ -167,7 +173,15 @@ class PARQUET_EXPORT ColumnChunkMetaData {
   int64_t index_page_offset() const;
   int64_t total_compressed_size() const;
   int64_t total_uncompressed_size() const;
+<<<<<<< HEAD
   std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const;
+=======
+  int64_t column_index_offset() const;
+  int64_t offset_index_offset() const;
+  int64_t column_index_length() const;
+  int64_t offset_index_length() const;
+  int64_t bloom_filter_offset() const;
+>>>>>>> 2d711a552... bloom-filter-reader
 
  private:
   explicit ColumnChunkMetaData(
@@ -180,6 +194,39 @@ class PARQUET_EXPORT ColumnChunkMetaData {
 };
 
 /// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
+<<<<<<< Updated upstream
+=======
+enum BoundaryOrder{
+  UNORDERED = 0,
+  ASCENDING = 1,
+  DESCENDING = 2
+};
+
+class PARQUET_EXPORT PageLocation{
+  int64_t offset;
+  int32_t compressed_page_size;
+  int64_t first_row_index;
+};
+
+class PARQUET_EXPORT ColumnIndex : format::PageHeader{
+  public:
+    static std::unique_ptr<ColumnIndex> Make(
+      std::vector<bool> null_pages,
+      std::vector <Type> min_values,
+      std::vector <Type> max_values,
+      BoundaryOrder boundary_order,
+      std::vector<int64_t> null_counts);
+    uint32_t read(apache::thrift::protocol::TProtocol* tp)  { return parquet::format::PageHeader::read(tp); }
+};
+
+class PARQUET_EXPORT OffsetIndex : format::PageHeader{
+  public:
+    static std::unique_ptr<OffsetIndex>  Make(
+      std::vector<PageLocation> page_locations);
+    uint32_t read(apache::thrift::protocol::TProtocol* tp) { return parquet::format::PageHeader::read(tp); }
+};
+
+>>>>>>> Stashed changes
 class PARQUET_EXPORT RowGroupMetaData {
  public:
   /// \brief Create a RowGroupMetaData from a serialized thrift message.
@@ -211,16 +258,37 @@ class PARQUET_EXPORT RowGroupMetaData {
   /// \brief Total byte size of all the uncompressed column data in this row group.
   int64_t total_byte_size() const;
 
+<<<<<<< Updated upstream
+=======
+<<<<<<< HEAD
+>>>>>>> Stashed changes
   /// \brief Byte offset from beginning of file to first page (data or
   /// dictionary) in this row group
   ///
   /// The file_offset field that this method exposes is optional. This method
   /// will return 0 if that field is not set to a meaningful value.
   int64_t file_offset() const;
+<<<<<<< Updated upstream
+=======
+=======
+>>>>>>> 5f0c77973... sorting columns
+>>>>>>> Stashed changes
   // Return const-pointer to make it clear that this object is not to be copied
   const SchemaDescriptor* schema() const;
+<<<<<<< HEAD
   // Indicate if all of the RowGroup's ColumnChunks can be decompressed.
   bool can_decompress() const;
+<<<<<<< Updated upstream
+=======
+=======
+  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const;
+<<<<<<< HEAD
+  std::vector<parquet::format::SortingColumn> sorting_columns;
+>>>>>>> 15c06767f... sorting columns
+=======
+  std::vector<parquet::format::SortingColumn> sorting_columns() const;
+>>>>>>> 5f0c77973... sorting columns
+>>>>>>> Stashed changes
 
  private:
   explicit RowGroupMetaData(
@@ -403,6 +471,10 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
               const std::map<Encoding::type, int32_t>& data_encoding_stats_,
               const std::shared_ptr<Encryptor>& encryptor = NULLPTR);
 
+  void WriteIndex(int64_t& file_pos_, int64_t& ci_offset, int64_t& oi_offset, uint32_t& ci_len, uint32_t& oi_len);
+
+  void WriteBloomFilterOffset(int64_t& file_pos);
+
   // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
   const void* contents() const;
 
diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift
index de875f7a559..daa07cfe310 100644
--- a/cpp/src/parquet/parquet.thrift
+++ b/cpp/src/parquet/parquet.thrift
@@ -915,6 +915,9 @@ struct OffsetIndex {
    * that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
    */
   1: required list<PageLocation> page_locations
+
+  /** page level bloom filter offset **/
+  2: required list<i64> page_bloom_filter_offsets
 }
 
 /**
@@ -953,6 +956,7 @@ struct ColumnIndex {
 
   /** A list containing the number of null values for each page **/
   5: optional list<i64> null_counts
+
 }
 
 struct AesGcmV1 {