From 3c47dec15c7f18b01e1f112abf7c5053d9ef2f6b Mon Sep 17 00:00:00 2001 From: evtrouble Date: Wed, 13 Aug 2025 10:17:23 +0800 Subject: [PATCH 1/3] Process the part marked as TODO remove me in SessionStage --- src/observer/net/sql_task_handler.cpp | 14 ++++- src/observer/session/session_stage.cpp | 71 +------------------------- src/observer/session/session_stage.h | 17 +----- 3 files changed, 14 insertions(+), 88 deletions(-) diff --git a/src/observer/net/sql_task_handler.cpp b/src/observer/net/sql_task_handler.cpp index 90f0609b4..148b87edf 100644 --- a/src/observer/net/sql_task_handler.cpp +++ b/src/observer/net/sql_task_handler.cpp @@ -21,7 +21,7 @@ See the Mulan PSL v2 for more details. */ RC SqlTaskHandler::handle_event(Communicator *communicator) { SessionEvent *event = nullptr; - RC rc = communicator->read_event(event); + RC rc = communicator->read_event(event); if (OB_FAIL(rc)) { return rc; } @@ -30,7 +30,7 @@ RC SqlTaskHandler::handle_event(Communicator *communicator) return RC::SUCCESS; } - session_stage_.handle_request2(event); + session_stage_.handle_request(event); SQLStageEvent sql_event(event, event->query()); @@ -55,6 +55,16 @@ RC SqlTaskHandler::handle_event(Communicator *communicator) return RC::SUCCESS; } +/** + * 处理一个SQL语句经历这几个阶段。 + * 虽然看起来流程比较多,但是对于大多数SQL来说,更多的可以关注parse和executor阶段。 + * 通常只有select、delete等带有查询条件的语句才需要进入optimize。 + * 对于DDL语句,比如create table、create index等,没有对应的查询计划,可以直接搜索 + * create_table_executor、create_index_executor来看具体的执行代码。 + * select、delete等DML语句,会产生一些执行计划,如果感觉繁琐,可以跳过optimize直接看 + * execute_stage中的执行,通过explain语句看需要哪些operator,然后找对应的operator来 + * 调试或者看代码执行过程即可。 + */ RC SqlTaskHandler::handle_sql(SQLStageEvent *sql_event) { RC rc = query_cache_stage_.handle_request(sql_event); diff --git a/src/observer/session/session_stage.cpp b/src/observer/session/session_stage.cpp index eed271357..92bda77c7 100644 --- a/src/observer/session/session_stage.cpp +++ b/src/observer/session/session_stage.cpp @@ -31,31 +31,7 @@ using namespace common; // Destructor SessionStage::~SessionStage() {} -// TODO remove me -void SessionStage::handle_request(SessionEvent *sev) -{ - string sql = sev->query(); - if (common::is_blank(sql.c_str())) { - return; - } - - Session::set_current_session(sev->session()); - sev->session()->set_current_request(sev); - SQLStageEvent sql_event(sev, sql); - (void)handle_sql(&sql_event); - - Communicator *communicator = sev->get_communicator(); - bool need_disconnect = false; - RC rc = communicator->write_result(sev, need_disconnect); - LOG_INFO("write result return %s", strrc(rc)); - if (need_disconnect) { - // do nothing - } - sev->session()->set_current_request(nullptr); - Session::set_current_session(nullptr); -} - -void SessionStage::handle_request2(SessionEvent *event) +void SessionStage::handle_request(SessionEvent *event) { const string &sql = event->query(); if (common::is_blank(sql.c_str())) { @@ -66,48 +42,3 @@ void SessionStage::handle_request2(SessionEvent *event) event->session()->set_current_request(event); SQLStageEvent sql_event(event, sql); } - -/** - * 处理一个SQL语句经历这几个阶段。 - * 虽然看起来流程比较多,但是对于大多数SQL来说,更多的可以关注parse和executor阶段。 - * 通常只有select、delete等带有查询条件的语句才需要进入optimize。 - * 对于DDL语句,比如create table、create index等,没有对应的查询计划,可以直接搜索 - * create_table_executor、create_index_executor来看具体的执行代码。 - * select、delete等DML语句,会产生一些执行计划,如果感觉繁琐,可以跳过optimize直接看 - * execute_stage中的执行,通过explain语句看需要哪些operator,然后找对应的operator来 - * 调试或者看代码执行过程即可。 - */ -RC SessionStage::handle_sql(SQLStageEvent *sql_event) -{ - RC rc = query_cache_stage_.handle_request(sql_event); - if (OB_FAIL(rc)) { - LOG_TRACE("failed to do query cache. rc=%s", strrc(rc)); - return rc; - } - - rc = parse_stage_.handle_request(sql_event); - if (OB_FAIL(rc)) { - LOG_TRACE("failed to do parse. rc=%s", strrc(rc)); - return rc; - } - - rc = resolve_stage_.handle_request(sql_event); - if (OB_FAIL(rc)) { - LOG_TRACE("failed to do resolve. rc=%s", strrc(rc)); - return rc; - } - - rc = optimize_stage_.handle_request(sql_event); - if (rc != RC::UNIMPLEMENTED && rc != RC::SUCCESS) { - LOG_TRACE("failed to do optimize. rc=%s", strrc(rc)); - return rc; - } - - rc = execute_stage_.handle_request(sql_event); - if (OB_FAIL(rc)) { - LOG_TRACE("failed to do execute. rc=%s", strrc(rc)); - return rc; - } - - return rc; -} diff --git a/src/observer/session/session_stage.h b/src/observer/session/session_stage.h index 2762c66ca..3575eddeb 100644 --- a/src/observer/session/session_stage.h +++ b/src/observer/session/session_stage.h @@ -14,11 +14,7 @@ See the Mulan PSL v2 for more details. */ #pragma once -#include "sql/executor/execute_stage.h" -#include "sql/optimizer/optimize_stage.h" -#include "sql/parser/parse_stage.h" -#include "sql/parser/resolve_stage.h" -#include "sql/query_cache/query_cache_stage.h" +class SessionEvent; /** * @brief SEDA处理的stage @@ -42,17 +38,6 @@ class SessionStage SessionStage() = default; virtual ~SessionStage(); -public: - void handle_request2(SessionEvent *event); - public: void handle_request(SessionEvent *event); - RC handle_sql(SQLStageEvent *sql_event); - -private: - QueryCacheStage query_cache_stage_; - ParseStage parse_stage_; - ResolveStage resolve_stage_; - OptimizeStage optimize_stage_; - ExecuteStage execute_stage_; }; From 2a74a61acfe0ed38747620017efbf802d42144e1 Mon Sep 17 00:00:00 2001 From: evtrouble Date: Fri, 15 Aug 2025 10:13:20 +0800 Subject: [PATCH 2/3] remove gen_parser in tasks.json --- .vscode/tasks.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index c64205fa5..fe5dc73f5 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -22,11 +22,6 @@ "label": "build_release", "type": "shell", "command": "bash build.sh release" - }, - { - "label": "gen_parser", - "type": "shell", - "command": "cd ${workspaceFolder}/src/observer/sql/parser && bash gen_parser.sh" } ] } From debb1ee1cf60108fdcb383e9598197b94b528ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B5=85=E9=9A=8F?= Date: Fri, 17 Oct 2025 16:50:25 +0800 Subject: [PATCH 3/3] Remove duplicate NestedLoopJoinPhysicalOperator --- docs/docs/db_course_lab/lab2.md | 2 +- .../sql/operator/join_physical_operator.cpp | 133 ------------------ .../sql/operator/join_physical_operator.h | 53 ------- .../nested_loop_join_physical_operator.cpp | 32 ++--- 4 files changed, 17 insertions(+), 203 deletions(-) delete mode 100644 src/observer/sql/operator/join_physical_operator.cpp delete mode 100644 src/observer/sql/operator/join_physical_operator.h diff --git a/docs/docs/db_course_lab/lab2.md b/docs/docs/db_course_lab/lab2.md index 91e90aac8..8031e7ccb 100644 --- a/docs/docs/db_course_lab/lab2.md +++ b/docs/docs/db_course_lab/lab2.md @@ -77,7 +77,7 @@ SELECT * FROM tbl1 #### 背景介绍 连接(Join)操作被用来将两个或多个关系表的数据组合起来。 -Nested Loop Join(NLJ)算法通过双层循环来输出结果,其中左表为外循环,右表为内循环。目前 MiniOB 中已经实现了 Nested Loop Join 算子(相关实现位于 `src/observer/sql/operator/join_physical_operator.h`)。 +Nested Loop Join(NLJ)算法通过双层循环来输出结果,其中左表为外循环,右表为内循环。目前 MiniOB 中已经实现了 Nested Loop Join 算子(相关实现位于 `src/observer/sql/operator/nested_loop_join_physical_operator.h`)。 基于哈希的连接(Hash Join)算法其执行过程分为两个阶段:构建阶段和探测阶段。哈希连接在两个关系表上执行,假设这两个关系表分别为 R 表和 S 表。在构建阶段,会遍历其中一个关系表(通常是基数较小的表,如图中的 R 表),以参与连接的属性列为键在一个哈希表中存储。在探测阶段,会遍历另一个关系表 S 的所有记录,以参与连接的属性列为键在哈希表中探测,当探测到具有相同键的记录则将结果输出。 ![hashjoin](images/hashjoin.png) diff --git a/src/observer/sql/operator/join_physical_operator.cpp b/src/observer/sql/operator/join_physical_operator.cpp deleted file mode 100644 index 0de387301..000000000 --- a/src/observer/sql/operator/join_physical_operator.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. -miniob is licensed under Mulan PSL v2. -You can use this software according to the terms and conditions of the Mulan PSL v2. -You may obtain a copy of Mulan PSL v2 at: - http://license.coscl.org.cn/MulanPSL2 -THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, -EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -See the Mulan PSL v2 for more details. */ - -// -// Created by WangYunlai on 2022/12/30. -// - -#include "sql/operator/join_physical_operator.h" - -NestedLoopJoinPhysicalOperator::NestedLoopJoinPhysicalOperator() {} - -RC NestedLoopJoinPhysicalOperator::open(Trx *trx) -{ - if (children_.size() != 2) { - LOG_WARN("nlj operator should have 2 children"); - return RC::INTERNAL; - } - - RC rc = RC::SUCCESS; - left_ = children_[0].get(); - right_ = children_[1].get(); - right_closed_ = true; - round_done_ = true; - - rc = left_->open(trx); - trx_ = trx; - return rc; -} - -RC NestedLoopJoinPhysicalOperator::next() -{ - bool left_need_step = (left_tuple_ == nullptr); - RC rc = RC::SUCCESS; - if (round_done_) { - left_need_step = true; - } else { - rc = right_next(); - if (rc != RC::SUCCESS) { - if (rc == RC::RECORD_EOF) { - left_need_step = true; - } else { - return rc; - } - } else { - return rc; // got one tuple from right - } - } - - if (left_need_step) { - rc = left_next(); - if (rc != RC::SUCCESS) { - return rc; - } - } - - rc = right_next(); - return rc; -} - -RC NestedLoopJoinPhysicalOperator::close() -{ - RC rc = left_->close(); - if (rc != RC::SUCCESS) { - LOG_WARN("failed to close left oper. rc=%s", strrc(rc)); - } - - if (!right_closed_) { - rc = right_->close(); - if (rc != RC::SUCCESS) { - LOG_WARN("failed to close right oper. rc=%s", strrc(rc)); - } else { - right_closed_ = true; - } - } - return rc; -} - -Tuple *NestedLoopJoinPhysicalOperator::current_tuple() { return &joined_tuple_; } - -RC NestedLoopJoinPhysicalOperator::left_next() -{ - RC rc = RC::SUCCESS; - rc = left_->next(); - if (rc != RC::SUCCESS) { - return rc; - } - - left_tuple_ = left_->current_tuple(); - joined_tuple_.set_left(left_tuple_); - return rc; -} - -RC NestedLoopJoinPhysicalOperator::right_next() -{ - RC rc = RC::SUCCESS; - if (round_done_) { - if (!right_closed_) { - rc = right_->close(); - - right_closed_ = true; - if (rc != RC::SUCCESS) { - return rc; - } - } - - rc = right_->open(trx_); - if (rc != RC::SUCCESS) { - return rc; - } - right_closed_ = false; - - round_done_ = false; - } - - rc = right_->next(); - if (rc != RC::SUCCESS) { - if (rc == RC::RECORD_EOF) { - round_done_ = true; - } - return rc; - } - - right_tuple_ = right_->current_tuple(); - joined_tuple_.set_right(right_tuple_); - return rc; -} diff --git a/src/observer/sql/operator/join_physical_operator.h b/src/observer/sql/operator/join_physical_operator.h deleted file mode 100644 index 930d5f194..000000000 --- a/src/observer/sql/operator/join_physical_operator.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. -miniob is licensed under Mulan PSL v2. -You can use this software according to the terms and conditions of the Mulan PSL v2. -You may obtain a copy of Mulan PSL v2 at: - http://license.coscl.org.cn/MulanPSL2 -THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, -EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -See the Mulan PSL v2 for more details. */ - -// -// Created by WangYunlai on 2021/6/10. -// - -#pragma once - -#include "sql/operator/physical_operator.h" -#include "sql/parser/parse.h" - -/** - * @brief 最简单的两表(称为左表、右表)join算子 - * @details 依次遍历左表的每一行,然后关联右表的每一行 - * @ingroup PhysicalOperator - */ -class NestedLoopJoinPhysicalOperator : public PhysicalOperator -{ -public: - NestedLoopJoinPhysicalOperator(); - virtual ~NestedLoopJoinPhysicalOperator() = default; - - PhysicalOperatorType type() const override { return PhysicalOperatorType::NESTED_LOOP_JOIN; } - - RC open(Trx *trx) override; - RC next() override; - RC close() override; - Tuple *current_tuple() override; - -private: - RC left_next(); //! 左表遍历下一条数据 - RC right_next(); //! 右表遍历下一条数据,如果上一轮结束了就重新开始新的一轮 - -private: - Trx *trx_ = nullptr; - - //! 左表右表的真实对象是在PhysicalOperator::children_中,这里是为了写的时候更简单 - PhysicalOperator *left_ = nullptr; - PhysicalOperator *right_ = nullptr; - Tuple *left_tuple_ = nullptr; - Tuple *right_tuple_ = nullptr; - JoinedTuple joined_tuple_; //! 当前关联的左右两个tuple - bool round_done_ = true; //! 右表遍历的一轮是否结束 - bool right_closed_ = true; //! 右表算子是否已经关闭 -}; diff --git a/src/observer/sql/operator/nested_loop_join_physical_operator.cpp b/src/observer/sql/operator/nested_loop_join_physical_operator.cpp index 5228d23fd..ae6f5581b 100644 --- a/src/observer/sql/operator/nested_loop_join_physical_operator.cpp +++ b/src/observer/sql/operator/nested_loop_join_physical_operator.cpp @@ -36,31 +36,31 @@ RC NestedLoopJoinPhysicalOperator::open(Trx *trx) RC NestedLoopJoinPhysicalOperator::next() { + bool left_need_step = (left_tuple_ == nullptr); RC rc = RC::SUCCESS; - while (RC::SUCCESS == rc) { - bool left_need_step = (left_tuple_ == nullptr); - if (round_done_) { - left_need_step = true; - } - - if (left_need_step) { - rc = left_next(); - if (rc != RC::SUCCESS) { - return rc; - } - } - + if (round_done_) { + left_need_step = true; + } else { rc = right_next(); if (rc != RC::SUCCESS) { if (rc == RC::RECORD_EOF) { - rc = RC::SUCCESS; - round_done_ = true; - continue; + left_need_step = true; } else { return rc; } + } else { + return rc; // got one tuple from right } } + + if (left_need_step) { + rc = left_next(); + if (rc != RC::SUCCESS) { + return rc; + } + } + + rc = right_next(); return rc; }