From e8a3c0aa2a55243090b1ff8a2c7164e261c4baec Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Mon, 26 Jan 2026 19:44:18 +0800 Subject: [PATCH 1/3] Add debug log and test case Signed-off-by: JaySon-Huang --- .../KVStore/Decode/PartitionStreams.cpp | 12 ++++ .../KVStore/Decode/RegionBlockReader.cpp | 10 ++++ .../KVStore/MultiRaft/RaftCommands.cpp | 1 + .../tests/gtest_region_block_reader.cpp | 58 +++++++++++++++++++ dbms/src/Storages/StorageDeltaMerge.cpp | 8 +++ dbms/src/TiDB/Decode/RowCodec.cpp | 30 ++++++++++ 6 files changed, 119 insertions(+) diff --git a/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp b/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp index 4e47246b305..31f445fbe1f 100644 --- a/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp +++ b/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp @@ -159,6 +159,18 @@ static inline bool atomicReadWrite( std::tie(decoding_schema_snapshot, block_ptr) = storage->getSchemaSnapshotAndBlockForDecoding(lock, true, should_handle_version_col); block_decoding_schema_epoch = decoding_schema_snapshot->decoding_schema_epoch; + const auto & table_info = storage->getTableInfo(); + LOG_INFO( + Logger::get("dddddddddd"), + "decode with schema snapshot, keyspace={} table_id={} region_id={} epoch={} update_ts={} " + "with_version_column={} columns={}", + rw_ctx.keyspace_id, + rw_ctx.table_id, + region->id(), + block_decoding_schema_epoch, + table_info.update_timestamp, + should_handle_version_col, + table_info.columns.size()); auto reader = RegionBlockReader(decoding_schema_snapshot); if (!reader.read(*block_ptr, data_list_read, force_decode)) diff --git a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp index e31892dc9b9..6773b808205 100644 --- a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp +++ b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp @@ -44,6 +44,11 @@ bool RegionBlockReader::read(Block & block, const ReadList & data_list, bool for { try { + for (const auto & data : data_list) + { + LOG_INFO(Logger::get("dddddddddd"), "RegionBlockReader::read data.value={}", data.value->toDebugString()); + } + switch (schema_snapshot->pk_type) { case TMTPKType::INT64: @@ -177,6 +182,11 @@ struct VersionColResolver template bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool force_decode) { + LOG_INFO( + Logger::get("dddddddddd"), + "RegionBlockReader::readImpl start, schema_snapshot.column_defines={}", + *schema_snapshot->column_defines); + VersionColResolver version_col_resolver; version_col_resolver.check(block, schema_snapshot->column_defines->size()); const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap(); diff --git a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp index bfb9d2b06e7..e2e05ce25f5 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp +++ b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp @@ -368,6 +368,7 @@ std::pair Region::handleWriteRaftCmd( { auto tikv_key = TiKVKey(cmds.keys[i].data, cmds.keys[i].len); auto tikv_value = TiKVValue(cmds.vals[i].data, cmds.vals[i].len); + LOG_INFO(Logger::get("dddddddddd"), "Region::handleWriteRaftCmd Put key={}, value={}", tikv_key.toDebugString(), tikv_value.toDebugString()); if (cf == ColumnFamilyType::Write) { write_put_key_count++; diff --git a/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp index 2c6f67f67ce..bac5b914663 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp @@ -693,4 +693,62 @@ try CATCH +TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue) +try +{ + // With this table_info, c1 can be decoded to NULL value + // TableInfo table_info( + // R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})", + // NullspaceID); + + // With this table_info, c1 is filled with "0" according to ori_default + TableInfo table_info( + R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":711,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})", + NullspaceID); + + RegionID region_id = 4; + // the start_key and end_key for table_id = 68 + String region_start_key(bytesFromHexString("7480000000000002FF7C5F720000000000FA")); + String region_end_key(bytesFromHexString("7480000000000002FF7D00000000000000F8")); + auto region = RegionBench::makeRegionForRange(region_id, region_start_key, region_end_key); + // the hex kv dump from SSTFile + std::vector> kvs = { + { + "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA", + "50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339" + "1ABC85", + }, + { + "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8", + "50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339" + "1ABC85", + }, + { + "7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8", + "509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C", + }, + }; + for (const auto & [k, v] : kvs) + { + region->insertDebug("write", TiKVKey(bytesFromHexString(k)), TiKVValue(bytesFromHexString(v))); + } + + auto data_list_read = ReadRegionCommitCache(region, true); + ASSERT_TRUE(data_list_read.has_value()); + + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + { + // force_decode=false can not decode because there are + // missing value for column with primary key flag. + auto reader = RegionBlockReader(decoding_schema); + Block res_block = createBlockSortByColumnID(decoding_schema); + EXPECT_TRUE(reader.read(res_block, *data_list_read, false)); + LOG_INFO( + Logger::get(), + "Decoded block:\n{}", + DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName())); + } +} +CATCH + } // namespace DB::tests diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 14926b90e09..7ac3f543642 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -1254,6 +1254,14 @@ std::pair StorageDeltaMerg store->getHandle(), decoding_schema_epoch++, with_version_column); + LOG_INFO( + Logger::get("dddddddddd"), + "Refresh decoding schema snapshot, table_id={} epoch={} update_ts={} with_version_column={} table_info={}", + tidb_table_info.id, + decoding_schema_snapshot->decoding_schema_epoch, + tidb_table_info.update_timestamp, + with_version_column, + tidb_table_info.serialize()); cache_blocks.clear(); decoding_schema_changed = false; } diff --git a/dbms/src/TiDB/Decode/RowCodec.cpp b/dbms/src/TiDB/Decode/RowCodec.cpp index 79ffc2dba99..3cfaa4af691 100644 --- a/dbms/src/TiDB/Decode/RowCodec.cpp +++ b/dbms/src/TiDB/Decode/RowCodec.cpp @@ -20,6 +20,8 @@ #include #include +#include "Common/FieldVisitors.h" + namespace DB { @@ -459,6 +461,12 @@ bool appendRowV2ToBlockImpl( raw_value, num_not_null_columns, value_offsets); + LOG_INFO( + Logger::get("dddddddddd"), + "not_null_column_ids={} null_column_ids={}", + not_null_column_ids, + null_column_ids); + size_t values_start_pos = cursor; size_t idx_not_null = 0; size_t idx_null = 0; @@ -499,6 +507,18 @@ bool appendRowV2ToBlockImpl( // a column. // Fill with default value and continue to read data for next column id. const auto & column_info = column_infos[column_ids_iter->second]; + LOG_INFO( + Logger::get("dddddddddd"), + "appendRowV2ToBlockImpl: fill default value for missing column," + " next_column_id={} next_datum_column_id={} block_column_pos={}" + " column_info={{name={} id={} not_null={} default_value={}}}", + next_column_id, + next_datum_column_id, + block_column_pos, + column_info.name, + column_info.id, + column_info.hasNotNullFlag(), + applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); if (!addDefaultValueToColumnIfPossible( column_info, block, @@ -575,6 +595,16 @@ bool appendRowV2ToBlockImpl( if (column_ids_iter->first != pk_handle_id) { const auto & column_info = column_infos[column_ids_iter->second]; + LOG_INFO( + Logger::get("dddddddddd"), + "appendRowV2ToBlockImpl: fill default value for missing column," + " block_column_pos={}" + " column_info={{name={} id={} not_null={} default_value={}}}", + block_column_pos, + column_info.name, + column_info.id, + column_info.hasNotNullFlag(), + applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); if (!addDefaultValueToColumnIfPossible( column_info, block, From 96ceb7fce9108bdd77fe4d657c0b4477fa818175 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 27 Jan 2026 16:25:56 +0800 Subject: [PATCH 2/3] Try fix default value filling logic Signed-off-by: JaySon-Huang --- dbms/src/Core/Block.cpp | 17 +++-- .../KVStore/Decode/RegionBlockReader.cpp | 9 ++- .../KVStore/MultiRaft/RaftCommands.cpp | 6 +- .../tests/gtest_region_block_reader.cpp | 62 +++++++++++++------ dbms/src/TiDB/Decode/RowCodec.cpp | 35 +++++++++-- 5 files changed, 91 insertions(+), 38 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 8b15f47cc06..ae0d6e34e3c 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -256,15 +256,14 @@ void Block::checkNumberOfRows() const { auto first_col = data.front(); throw Exception( - fmt::format( - "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", - first_col.name, - first_col.column_id, - rows, - elem.name, - elem.column_id, - size), - ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, + "Sizes of columns doesn't match: {}(id={}): rows={}, {}(id={}): rows={}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size); } } } diff --git a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp index 6773b808205..c9c52338813 100644 --- a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp +++ b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp @@ -189,6 +189,7 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool VersionColResolver version_col_resolver; version_col_resolver.check(block, schema_snapshot->column_defines->size()); + // The column_ids to read according to schema_snapshot, each elem is (column_id, block_pos) const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap(); const auto & pk_column_ids = schema_snapshot->pk_column_ids; const auto & pk_pos_map = schema_snapshot->pk_pos_map; @@ -204,12 +205,12 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool /// extra handle, del, version column is with column id smaller than other visible column id, /// so they must exists before all other columns, and we can get them before decoding other columns ColumnUInt8 * raw_delmark_col = nullptr; - const size_t invalid_column_pos = std::numeric_limits::max(); + const static size_t INVALID_COLUMN_POS = std::numeric_limits::max(); // we cannot figure out extra_handle's column type now, so we just remember it's pos here - size_t extra_handle_column_pos = invalid_column_pos; + size_t extra_handle_column_pos = INVALID_COLUMN_POS; while (raw_delmark_col == nullptr || version_col_resolver.needBuild() - || extra_handle_column_pos == invalid_column_pos) + || extra_handle_column_pos == INVALID_COLUMN_POS) { if (column_ids_iter->first == MutSup::delmark_col_id) { @@ -279,6 +280,8 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool else { // Parse column value from encoded value + // Decode the column_ids from `column_ids_iter` to `read_column_ids.end()` + // and insert into `block` at position starting from `next_column_pos` if (!appendRowToBlock( *value_ptr, column_ids_iter, diff --git a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp index e2e05ce25f5..b2b55c623bd 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp +++ b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp @@ -368,7 +368,11 @@ std::pair Region::handleWriteRaftCmd( { auto tikv_key = TiKVKey(cmds.keys[i].data, cmds.keys[i].len); auto tikv_value = TiKVValue(cmds.vals[i].data, cmds.vals[i].len); - LOG_INFO(Logger::get("dddddddddd"), "Region::handleWriteRaftCmd Put key={}, value={}", tikv_key.toDebugString(), tikv_value.toDebugString()); + LOG_INFO( + Logger::get("dddddddddd"), + "Region::handleWriteRaftCmd Put key={}, value={}", + tikv_key.toDebugString(), + tikv_value.toDebugString()); if (cf == ColumnFamilyType::Write) { write_put_key_count++; diff --git a/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp index bac5b914663..e372fb0260a 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp @@ -354,7 +354,9 @@ TEST_F(RegionBlockReaderTest, MissingColumnRowV2) encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto new_table_info = getTableInfoWithMoreColumns({MutSup::extra_handle_id}, false); auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); - ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); + // `decodeAndCheckColumns` will return false because "column1" not_null=true but no_default_value=false + // FIXME: Re check the logic here + ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); } TEST_F(RegionBlockReaderTest, MissingColumnRowV1) @@ -363,7 +365,9 @@ TEST_F(RegionBlockReaderTest, MissingColumnRowV1) encodeColumns(table_info, fields, RowEncodeVersion::RowV1); auto new_table_info = getTableInfoWithMoreColumns({MutSup::extra_handle_id}, false); auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); - ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); + // `decodeAndCheckColumns` will return false because "column1" not_null=true but no_default_value=false + // FIXME: Re check the logic here + ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); } TEST_F(RegionBlockReaderTest, ExtraColumnRowV2) @@ -696,11 +700,6 @@ CATCH TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue) try { - // With this table_info, c1 can be decoded to NULL value - // TableInfo table_info( - // R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})", - // NullspaceID); - // With this table_info, c1 is filled with "0" according to ori_default TableInfo table_info( R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":711,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})", @@ -713,16 +712,16 @@ try auto region = RegionBench::makeRegionForRange(region_id, region_start_key, region_end_key); // the hex kv dump from SSTFile std::vector> kvs = { - { - "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA", - "50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339" - "1ABC85", - }, - { - "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8", - "50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339" - "1ABC85", - }, + // { + // "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA", + // "50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339" + // "1ABC85", + // }, + // { + // "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8", + // "50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339" + // "1ABC85", + // }, { "7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8", "509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C", @@ -739,10 +738,35 @@ try auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); { // force_decode=false can not decode because there are - // missing value for column with primary key flag. + // missing value for column with not null flag. + auto reader = RegionBlockReader(decoding_schema); + Block res_block = createBlockSortByColumnID(decoding_schema); + ASSERT_FALSE(reader.read(res_block, *data_list_read, false)); + } + { + // force_decode=true can decode the block, and filling the default value for c1 + auto reader = RegionBlockReader(decoding_schema); + Block res_block = createBlockSortByColumnID(decoding_schema); + ASSERT_TRUE(reader.read(res_block, *data_list_read, true)); + // TODO: verify the default value is filled correctly + LOG_INFO( + Logger::get(), + "Decoded block:\n{}", + DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName())); + } + + // With this table_info, c1 does not have the "not null" flag + TableInfo table_info_c1_nullable( + R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})", + NullspaceID); + + decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_nullable); + { + // force_decode=false should be able to decode because c1 is nullable auto reader = RegionBlockReader(decoding_schema); Block res_block = createBlockSortByColumnID(decoding_schema); - EXPECT_TRUE(reader.read(res_block, *data_list_read, false)); + ASSERT_TRUE(reader.read(res_block, *data_list_read, false)); + // TODO: verify the default value is filled correctly LOG_INFO( Logger::get(), "Decoded block:\n{}", diff --git a/dbms/src/TiDB/Decode/RowCodec.cpp b/dbms/src/TiDB/Decode/RowCodec.cpp index 3cfaa4af691..0f1e41a7f43 100644 --- a/dbms/src/TiDB/Decode/RowCodec.cpp +++ b/dbms/src/TiDB/Decode/RowCodec.cpp @@ -414,10 +414,15 @@ inline bool addDefaultValueToColumnIfPossible( // fallthrough to fill default value when force_decode } - if (column_info.hasNoDefaultValueFlag() && column_info.hasNotNullFlag()) + if (column_info.hasNotNullFlag()) { if (!force_decode) + { + // This is a Column that does not have encoded datum in the value, but it is defined as NOT NULL. + // It could be a row encoded by newer schema after turning `NOT NULL` to `NULLABLE`. + // Return false to trigger schema sync when `force_decode==false`. return false; + } // Else the row does not contain this "not null" / "no default value" column, // it could be a row encoded by older schema. // fallthrough to fill default value when force_decode @@ -468,7 +473,9 @@ bool appendRowV2ToBlockImpl( null_column_ids); size_t values_start_pos = cursor; + // how many not null columns have been processed size_t idx_not_null = 0; + // how many null columns have been processed size_t idx_null = 0; // Merge ordered not null/null columns to keep order. while (idx_not_null < not_null_column_ids.size() || idx_null < null_column_ids.size()) @@ -487,14 +494,21 @@ bool appendRowV2ToBlockImpl( auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; const auto next_column_id = column_ids_iter->first; + LOG_INFO( + Logger::get("dddddddddd"), + "next_column_id={} next_datum_column_id={} force_decode={}", + next_column_id, + next_datum_column_id, + force_decode); if (next_column_id > next_datum_column_id) { - // The next column id to read is bigger than the column id of next datum in encoded row. + // The next_column_id to read is bigger than the next_datum_column_id in encoded row. // It means this is the datum of extra column. May happen when reading after dropping // a column. + // For `force_decode == false`, we should return false to let upper layer trigger schema sync. if (!force_decode) return false; - // Ignore the extra column and continue to parse other datum + // For `force_decode == true`, we just skip this extra column and continue to parse other datum. if (is_null) idx_null++; else @@ -502,7 +516,7 @@ bool appendRowV2ToBlockImpl( } else if (next_column_id < next_datum_column_id) { - // The next column id to read is less than the column id of next datum in encoded row. + // The next_column_id to read is less than the next_datum_column_id in encoded row. // It means this is the datum of missing column. May happen when reading after adding // a column. // Fill with default value and continue to read data for next column id. @@ -511,13 +525,14 @@ bool appendRowV2ToBlockImpl( Logger::get("dddddddddd"), "appendRowV2ToBlockImpl: fill default value for missing column," " next_column_id={} next_datum_column_id={} block_column_pos={}" - " column_info={{name={} id={} not_null={} default_value={}}}", + " column_info={{name={} id={} not_null={} no_default_val={} default_value={}}}", next_column_id, next_datum_column_id, block_column_pos, column_info.name, column_info.id, column_info.hasNotNullFlag(), + column_info.hasNoDefaultValueFlag(), applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); if (!addDefaultValueToColumnIfPossible( column_info, @@ -525,7 +540,9 @@ bool appendRowV2ToBlockImpl( block_column_pos, ignore_pk_if_absent, force_decode)) + { return false; + } column_ids_iter++; block_column_pos++; } @@ -590,8 +607,11 @@ bool appendRowV2ToBlockImpl( block_column_pos++; } } + // There are more columns to read other than the datum encoded in the row. while (column_ids_iter != column_ids_iter_end) { + // Skip if the column is the same as `pk_handle_id`. The value of column + // `pk_handle_id` will be filled in upper layer but not in this function. if (column_ids_iter->first != pk_handle_id) { const auto & column_info = column_infos[column_ids_iter->second]; @@ -599,11 +619,12 @@ bool appendRowV2ToBlockImpl( Logger::get("dddddddddd"), "appendRowV2ToBlockImpl: fill default value for missing column," " block_column_pos={}" - " column_info={{name={} id={} not_null={} default_value={}}}", + " column_info={{name={} id={} not_null={} no_default_val={} default_value={}}}", block_column_pos, column_info.name, column_info.id, column_info.hasNotNullFlag(), + column_info.hasNoDefaultValueFlag(), applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); if (!addDefaultValueToColumnIfPossible( column_info, @@ -611,7 +632,9 @@ bool appendRowV2ToBlockImpl( block_column_pos, ignore_pk_if_absent, force_decode)) + { return false; + } } column_ids_iter++; block_column_pos++; From f0f7d8145e4c2ca0f0d6edc20bff679e41bbdcb4 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 27 Jan 2026 16:59:55 +0800 Subject: [PATCH 3/3] Disable debugging log Signed-off-by: JaySon-Huang --- dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp | 2 ++ dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp | 4 ++++ dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp | 2 ++ dbms/src/Storages/StorageDeltaMerge.cpp | 2 ++ dbms/src/TiDB/Decode/RowCodec.cpp | 8 ++++++++ 5 files changed, 18 insertions(+) diff --git a/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp b/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp index 31f445fbe1f..a30600d5818 100644 --- a/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp +++ b/dbms/src/Storages/KVStore/Decode/PartitionStreams.cpp @@ -159,6 +159,7 @@ static inline bool atomicReadWrite( std::tie(decoding_schema_snapshot, block_ptr) = storage->getSchemaSnapshotAndBlockForDecoding(lock, true, should_handle_version_col); block_decoding_schema_epoch = decoding_schema_snapshot->decoding_schema_epoch; +#if 0 const auto & table_info = storage->getTableInfo(); LOG_INFO( Logger::get("dddddddddd"), @@ -171,6 +172,7 @@ static inline bool atomicReadWrite( table_info.update_timestamp, should_handle_version_col, table_info.columns.size()); +#endif auto reader = RegionBlockReader(decoding_schema_snapshot); if (!reader.read(*block_ptr, data_list_read, force_decode)) diff --git a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp index c9c52338813..668459b9d16 100644 --- a/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp +++ b/dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp @@ -44,10 +44,12 @@ bool RegionBlockReader::read(Block & block, const ReadList & data_list, bool for { try { +#if 0 for (const auto & data : data_list) { LOG_INFO(Logger::get("dddddddddd"), "RegionBlockReader::read data.value={}", data.value->toDebugString()); } +#endif switch (schema_snapshot->pk_type) { @@ -182,10 +184,12 @@ struct VersionColResolver template bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool force_decode) { +#if 0 LOG_INFO( Logger::get("dddddddddd"), "RegionBlockReader::readImpl start, schema_snapshot.column_defines={}", *schema_snapshot->column_defines); +#endif VersionColResolver version_col_resolver; version_col_resolver.check(block, schema_snapshot->column_defines->size()); diff --git a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp index b2b55c623bd..b9f3f33255b 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp +++ b/dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp @@ -368,11 +368,13 @@ std::pair Region::handleWriteRaftCmd( { auto tikv_key = TiKVKey(cmds.keys[i].data, cmds.keys[i].len); auto tikv_value = TiKVValue(cmds.vals[i].data, cmds.vals[i].len); +#if 0 LOG_INFO( Logger::get("dddddddddd"), "Region::handleWriteRaftCmd Put key={}, value={}", tikv_key.toDebugString(), tikv_value.toDebugString()); +#endif if (cf == ColumnFamilyType::Write) { write_put_key_count++; diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 7ac3f543642..fb704951370 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -1254,6 +1254,7 @@ std::pair StorageDeltaMerg store->getHandle(), decoding_schema_epoch++, with_version_column); +#if 0 LOG_INFO( Logger::get("dddddddddd"), "Refresh decoding schema snapshot, table_id={} epoch={} update_ts={} with_version_column={} table_info={}", @@ -1262,6 +1263,7 @@ std::pair StorageDeltaMerg tidb_table_info.update_timestamp, with_version_column, tidb_table_info.serialize()); +#endif cache_blocks.clear(); decoding_schema_changed = false; } diff --git a/dbms/src/TiDB/Decode/RowCodec.cpp b/dbms/src/TiDB/Decode/RowCodec.cpp index 0f1e41a7f43..c0bac97468c 100644 --- a/dbms/src/TiDB/Decode/RowCodec.cpp +++ b/dbms/src/TiDB/Decode/RowCodec.cpp @@ -466,11 +466,13 @@ bool appendRowV2ToBlockImpl( raw_value, num_not_null_columns, value_offsets); +#if 0 LOG_INFO( Logger::get("dddddddddd"), "not_null_column_ids={} null_column_ids={}", not_null_column_ids, null_column_ids); +#endif size_t values_start_pos = cursor; // how many not null columns have been processed @@ -494,12 +496,14 @@ bool appendRowV2ToBlockImpl( auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; const auto next_column_id = column_ids_iter->first; +#if 0 LOG_INFO( Logger::get("dddddddddd"), "next_column_id={} next_datum_column_id={} force_decode={}", next_column_id, next_datum_column_id, force_decode); +#endif if (next_column_id > next_datum_column_id) { // The next_column_id to read is bigger than the next_datum_column_id in encoded row. @@ -521,6 +525,7 @@ bool appendRowV2ToBlockImpl( // a column. // Fill with default value and continue to read data for next column id. const auto & column_info = column_infos[column_ids_iter->second]; +#if 0 LOG_INFO( Logger::get("dddddddddd"), "appendRowV2ToBlockImpl: fill default value for missing column," @@ -534,6 +539,7 @@ bool appendRowV2ToBlockImpl( column_info.hasNotNullFlag(), column_info.hasNoDefaultValueFlag(), applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); +#endif if (!addDefaultValueToColumnIfPossible( column_info, block, @@ -615,6 +621,7 @@ bool appendRowV2ToBlockImpl( if (column_ids_iter->first != pk_handle_id) { const auto & column_info = column_infos[column_ids_iter->second]; +#if 0 LOG_INFO( Logger::get("dddddddddd"), "appendRowV2ToBlockImpl: fill default value for missing column," @@ -626,6 +633,7 @@ bool appendRowV2ToBlockImpl( column_info.hasNotNullFlag(), column_info.hasNoDefaultValueFlag(), applyVisitor(FieldVisitorToString(), column_info.defaultValueToField())); +#endif if (!addDefaultValueToColumnIfPossible( column_info, block,