Skip to content

Commit 52c2788

Browse files
authored
Merge branch 'main' into spark_abs
2 parents 029306e + 3863267 commit 52c2788

File tree

152 files changed

+2624
-2287
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+2624
-2287
lines changed

.github/workflows/audit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
steps:
4343
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
4444
- name: Install cargo-audit
45-
uses: taiki-e/install-action@6cc14f7f2f4b3129aff07a8b071d2d4f2733465d # v2.62.50
45+
uses: taiki-e/install-action@0be4756f42223b67aa4b7df5effad59010cbf4b9 # v2.62.51
4646
with:
4747
tool: cargo-audit
4848
- name: Run audit check

.github/workflows/rust.yml

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,21 @@ jobs:
271271
runs-on: ubuntu-latest
272272
container:
273273
image: amd64/rust
274+
volumes:
275+
- /usr/local:/host/usr/local
274276
steps:
277+
- name: Remove unnecessary preinstalled software
278+
run: |
279+
echo "Disk space before cleanup:"
280+
df -h
281+
# remove tool cache: about 8.5GB (github has host /opt/hostedtoolcache mounted as /__t)
282+
rm -rf /__t/* || true
283+
# remove Haskell runtime: about 6.3GB (host /usr/local/.ghcup)
284+
rm -rf /host/usr/local/.ghcup || true
285+
# remove Android library: about 7.8GB (host /usr/local/lib/android)
286+
rm -rf /host/usr/local/lib/android || true
287+
echo "Disk space after cleanup:"
288+
df -h
275289
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
276290
with:
277291
submodules: true
@@ -434,7 +448,7 @@ jobs:
434448
sudo apt-get update -qq
435449
sudo apt-get install -y -qq clang
436450
- name: Setup wasm-pack
437-
uses: taiki-e/install-action@6cc14f7f2f4b3129aff07a8b071d2d4f2733465d # v2.62.50
451+
uses: taiki-e/install-action@0be4756f42223b67aa4b7df5effad59010cbf4b9 # v2.62.51
438452
with:
439453
tool: wasm-pack
440454
- name: Run tests with headless mode
@@ -761,7 +775,7 @@ jobs:
761775
- name: Setup Rust toolchain
762776
uses: ./.github/actions/setup-builder
763777
- name: Install cargo-msrv
764-
uses: taiki-e/install-action@6cc14f7f2f4b3129aff07a8b071d2d4f2733465d # v2.62.50
778+
uses: taiki-e/install-action@0be4756f42223b67aa4b7df5effad59010cbf4b9 # v2.62.51
765779
with:
766780
tool: cargo-msrv
767781

@@ -806,4 +820,4 @@ jobs:
806820
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
807821
with:
808822
persist-credentials: false
809-
- uses: crate-ci/typos@1af53e3774f068183ffd0c7193eb061a2b65a531 # v1.39.1
823+
- uses: crate-ci/typos@626c4bedb751ce0b7f03262ca97ddda9a076ae1c # v1.39.2

datafusion-examples/README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ cargo run --example dataframe
5151
- [`examples/udf/advanced_udwf.rs`](examples/udf/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
5252
- [`examples/data_io/parquet_advanced_index.rs`](examples/data_io/parquet_advanced_index.rs): Creates a detailed secondary index that covers the contents of several parquet files
5353
- [`examples/udf/async_udf.rs`](examples/udf/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
54-
- [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
54+
- [`examples/query_planning/analyzer_rule.rs`](examples/query_planning/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
5555
- [`examples/data_io/catalog.rs`](examples/data_io/catalog.rs): Register the table into a custom catalog
5656
- [`examples/data_io/json_shredding.rs`](examples/data_io/json_shredding.rs): Shows how to implement custom filter rewriting for JSON shredding
5757
- [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
@@ -65,22 +65,23 @@ cargo run --example dataframe
6565
- [`examples/builtin_functions/date_time`](examples/builtin_functions/date_time.rs): Examples of date-time related functions and queries
6666
- [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
6767
- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
68-
- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
68+
- [`examples/query_planning/expr_api.rs`](examples/query_planning/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
6969
- [`examples/custom_data_source/file_stream_provider.rs`](examples/custom_data_source/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
7070
- [`flight/sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from Flight and and FlightSQL (e.g. JDBC) clients
7171
- [`examples/builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
7272
- [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
7373
- [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
74-
- [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
74+
- [`examples/query_planning/optimizer_rule.rs`](examples/query_planning/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
7575
- [`examples/data_io/parquet_embedded_index.rs`](examples/data_io/parquet_embedded_index.rs): Store a custom index inside a Parquet file and use it to speed up queries
7676
- [`examples/data_io/parquet_encrypted.rs`](examples/data_io/parquet_encrypted.rs): Read and write encrypted Parquet files using DataFusion
7777
- [`examples/data_io/parquet_encrypted_with_kms.rs`](examples/data_io/parquet_encrypted_with_kms.rs): Read and write encrypted Parquet files using an encryption factory
7878
- [`examples/data_io/parquet_index.rs`](examples/data_io/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries
7979
- [`examples/data_io/parquet_exec_visitor.rs`](examples/data_io/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
80-
- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
81-
- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
82-
- [`planner_api.rs`](examples/planner_api.rs) APIs to manipulate logical and physical plans
83-
- [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
80+
- [`examples/query_planning/parse_sql_expr.rs`](examples/query_planning/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
81+
- [`examples/query_planning/plan_to_sql.rs`](examples/query_planning/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
82+
- [`examples/query_planning/planner_api.rs`](examples/query_planning/planner_api.rs) APIs to manipulate logical and physical plans
83+
- [`examples/query_planning/pruning.rs`](examples/query_planning/pruning.rs): Use pruning to rule out files based on statistics
84+
- [`examples/query_planning/thread_pools.rs`](examples/query_planning/thread_pools.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages and shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
8485
- [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
8586
- [`examples/data_io/query_http_csv.rs`](examples/data_io/query_http_csv.rs): Configure `object_store` and run a query against files via HTTP
8687
- [`examples/builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions

datafusion-examples/examples/analyzer_rule.rs renamed to datafusion-examples/examples/query_planning/analyzer_rule.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ use std::sync::{Arc, Mutex};
3535
/// level access control scheme by introducing a filter to the query.
3636
///
3737
/// See [optimizer_rule.rs] for an example of a optimizer rule
38-
#[tokio::main]
39-
pub async fn main() -> Result<()> {
38+
pub async fn analyzer_rule() -> Result<()> {
4039
// AnalyzerRules run before OptimizerRules.
4140
//
4241
// DataFusion includes several built in AnalyzerRules for tasks such as type

datafusion-examples/examples/expr_api.rs renamed to datafusion-examples/examples/query_planning/expr_api.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ use datafusion::prelude::*;
5555
/// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
5656
/// 6. Get the types of the expressions: [`expression_type_demo`]
5757
/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
58-
#[tokio::main]
59-
async fn main() -> Result<()> {
58+
pub async fn expr_api() -> Result<()> {
6059
// The easiest way to do create expressions is to use the
6160
// "fluent"-style API:
6261
let expr = col("a") + lit(5);
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! # These are all internal mechanics of the query planning and optimization layers
19+
//!
20+
//! These examples demonstrate internal mechanics of the query planning and optimization layers.
21+
//!
22+
//! ## Usage
23+
//! ```bash
24+
//! cargo run --example query_planning -- [analyzer_rule|expr_api|optimizer_rule|parse_sql_expr|plan_to_sql|planner_api|pruning|thread_pools]
25+
//! ```
26+
//!
27+
//! Each subcommand runs a corresponding example:
28+
//! - `analyzer_rule` — use a custom AnalyzerRule to change a query's semantics (row level access control)
29+
//! - `expr_api` — create, execute, simplify, analyze and coerce `Expr`s
30+
//! - `optimizer_rule` — use a custom OptimizerRule to replace certain predicates
31+
//! - `parse_sql_expr` — parse SQL text into DataFusion `Expr`
32+
//! - `plan_to_sql` — generate SQL from DataFusion `Expr` and `LogicalPlan`
33+
//! - `planner_api` — APIs to manipulate logical and physical plans
34+
//! - `pruning` — APIs to manipulate logical and physical plans
35+
//! - `thread_pools` — demonstrate TrackConsumersPool for memory tracking and debugging with enhanced error messages and shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
36+
37+
mod analyzer_rule;
38+
mod expr_api;
39+
mod optimizer_rule;
40+
mod parse_sql_expr;
41+
mod plan_to_sql;
42+
mod planner_api;
43+
mod pruning;
44+
mod thread_pools;
45+
46+
use std::str::FromStr;
47+
48+
use datafusion::error::{DataFusionError, Result};
49+
50+
enum ExampleKind {
51+
AnalyzerRule,
52+
ExprApi,
53+
OptimizerRule,
54+
ParseSqlExpr,
55+
PlanToSql,
56+
PlannerApi,
57+
Pruning,
58+
ThreadPools,
59+
}
60+
61+
impl AsRef<str> for ExampleKind {
62+
fn as_ref(&self) -> &str {
63+
match self {
64+
Self::AnalyzerRule => "analyzer_rule",
65+
Self::ExprApi => "expr_api",
66+
Self::OptimizerRule => "optimizer_rule",
67+
Self::ParseSqlExpr => "parse_sql_expr",
68+
Self::PlanToSql => "plan_to_sql",
69+
Self::PlannerApi => "planner_api",
70+
Self::Pruning => "pruning",
71+
Self::ThreadPools => "thread_pools",
72+
}
73+
}
74+
}
75+
76+
impl FromStr for ExampleKind {
77+
type Err = DataFusionError;
78+
79+
fn from_str(s: &str) -> Result<Self> {
80+
match s {
81+
"analyzer_rule" => Ok(Self::AnalyzerRule),
82+
"expr_api" => Ok(Self::ExprApi),
83+
"optimizer_rule" => Ok(Self::OptimizerRule),
84+
"parse_sql_expr" => Ok(Self::ParseSqlExpr),
85+
"plan_to_sql" => Ok(Self::PlanToSql),
86+
"planner_api" => Ok(Self::PlannerApi),
87+
"pruning" => Ok(Self::Pruning),
88+
"thread_pools" => Ok(Self::ThreadPools),
89+
_ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
90+
}
91+
}
92+
}
93+
94+
impl ExampleKind {
95+
const ALL: [Self; 8] = [
96+
Self::AnalyzerRule,
97+
Self::ExprApi,
98+
Self::OptimizerRule,
99+
Self::ParseSqlExpr,
100+
Self::PlanToSql,
101+
Self::PlannerApi,
102+
Self::Pruning,
103+
Self::ThreadPools,
104+
];
105+
106+
const EXAMPLE_NAME: &str = "query_planning";
107+
108+
fn variants() -> Vec<&'static str> {
109+
Self::ALL.iter().map(|x| x.as_ref()).collect()
110+
}
111+
}
112+
113+
#[tokio::main]
114+
async fn main() -> Result<()> {
115+
let usage = format!(
116+
"Usage: cargo run --example {} -- [{}]",
117+
ExampleKind::EXAMPLE_NAME,
118+
ExampleKind::variants().join("|")
119+
);
120+
121+
let arg = std::env::args().nth(1).ok_or_else(|| {
122+
eprintln!("{usage}");
123+
DataFusionError::Execution("Missing argument".to_string())
124+
})?;
125+
126+
match arg.parse::<ExampleKind>()? {
127+
ExampleKind::AnalyzerRule => analyzer_rule::analyzer_rule().await?,
128+
ExampleKind::ExprApi => expr_api::expr_api().await?,
129+
ExampleKind::OptimizerRule => optimizer_rule::optimizer_rule().await?,
130+
ExampleKind::ParseSqlExpr => parse_sql_expr::parse_sql_expr().await?,
131+
ExampleKind::PlanToSql => plan_to_sql::plan_to_sql_examples().await?,
132+
ExampleKind::PlannerApi => planner_api::planner_api().await?,
133+
ExampleKind::Pruning => pruning::pruning().await?,
134+
ExampleKind::ThreadPools => thread_pools::thread_pools().await?,
135+
}
136+
137+
Ok(())
138+
}

datafusion-examples/examples/optimizer_rule.rs renamed to datafusion-examples/examples/query_planning/optimizer_rule.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ use std::sync::Arc;
3737
///
3838
/// See [analyzer_rule.rs] for an example of AnalyzerRules, which are for
3939
/// changing plan semantics.
40-
#[tokio::main]
41-
pub async fn main() -> Result<()> {
40+
pub async fn optimizer_rule() -> Result<()> {
4241
// DataFusion includes many built in OptimizerRules for tasks such as outer
4342
// to inner join conversion and constant folding.
4443
//

datafusion-examples/examples/parse_sql_expr.rs renamed to datafusion-examples/examples/query_planning/parse_sql_expr.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,15 @@ use datafusion::{
3232
/// The code in this example shows how to:
3333
///
3434
/// 1. [`simple_session_context_parse_sql_expr_demo`]: Parse a simple SQL text into a logical
35-
/// expression using a schema at [`SessionContext`].
35+
/// expression using a schema at [`SessionContext`].
3636
///
3737
/// 2. [`simple_dataframe_parse_sql_expr_demo`]: Parse a simple SQL text into a logical expression
38-
/// using a schema at [`DataFrame`].
38+
/// using a schema at [`DataFrame`].
3939
///
4040
/// 3. [`query_parquet_demo`]: Query a parquet file using the parsed_sql_expr from a DataFrame.
4141
///
4242
/// 4. [`round_trip_parse_sql_expr_demo`]: Parse a SQL text and convert it back to SQL using [`Unparser`].
43-
44-
#[tokio::main]
45-
async fn main() -> Result<()> {
43+
pub async fn parse_sql_expr() -> Result<()> {
4644
// See how to evaluate expressions
4745
simple_session_context_parse_sql_expr_demo()?;
4846
simple_dataframe_parse_sql_expr_demo().await?;

datafusion-examples/examples/plan_to_sql.rs renamed to datafusion-examples/examples/query_planning/plan_to_sql.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,28 +43,26 @@ use std::sync::Arc;
4343
/// The code in this example shows how to:
4444
///
4545
/// 1. [`simple_expr_to_sql_demo`]: Create a simple expression [`Exprs`] with
46-
/// fluent API and convert to sql suitable for passing to another database
46+
/// fluent API and convert to sql suitable for passing to another database
4747
///
4848
/// 2. [`simple_expr_to_pretty_sql_demo`] Create a simple expression
49-
/// [`Exprs`] with fluent API and convert to sql without extra parentheses,
50-
/// suitable for displaying to humans
49+
/// [`Exprs`] with fluent API and convert to sql without extra parentheses,
50+
/// suitable for displaying to humans
5151
///
5252
/// 3. [`simple_expr_to_sql_demo_escape_mysql_style`]" Create a simple
53-
/// expression [`Exprs`] with fluent API and convert to sql escaping column
54-
/// names in MySQL style.
53+
/// expression [`Exprs`] with fluent API and convert to sql escaping column
54+
/// names in MySQL style.
5555
///
5656
/// 4. [`simple_plan_to_sql_demo`]: Create a simple logical plan using the
57-
/// DataFrames API and convert to sql string.
57+
/// DataFrames API and convert to sql string.
5858
///
5959
/// 5. [`round_trip_plan_to_sql_demo`]: Create a logical plan from a SQL string, modify it using the
60-
/// DataFrames API and convert it back to a sql string.
60+
/// DataFrames API and convert it back to a sql string.
6161
///
6262
/// 6. [`unparse_my_logical_plan_as_statement`]: Create a custom logical plan and unparse it as a statement.
6363
///
6464
/// 7. [`unparse_my_logical_plan_as_subquery`]: Create a custom logical plan and unparse it as a subquery.
65-
66-
#[tokio::main]
67-
async fn main() -> Result<()> {
65+
pub async fn plan_to_sql_examples() -> Result<()> {
6866
// See how to evaluate expressions
6967
simple_expr_to_sql_demo()?;
7068
simple_expr_to_pretty_sql_demo()?;

datafusion-examples/examples/planner_api.rs renamed to datafusion-examples/examples/query_planning/planner_api.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ use datafusion::prelude::*;
3232
/// physical plan:
3333
/// - Via the combined `create_physical_plan` API.
3434
/// - Utilizing the analyzer, optimizer, and query planner APIs separately.
35-
#[tokio::main]
36-
async fn main() -> Result<()> {
35+
pub async fn planner_api() -> Result<()> {
3736
// Set up a DataFusion context and load a Parquet file
3837
let ctx = SessionContext::new();
3938
let testdata = datafusion::test_util::parquet_test_data();

0 commit comments

Comments
 (0)