diff --git a/.gitignore b/.gitignore index 607453a5d..6b6743c10 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,10 @@ pcre/pcre_stringpiece.h pcre/pcrecpparg.h pcre/stamp-h1 pcre/test-driver +build/ +docs/ +Doxyfile +data/ +CMakeFiles/ +CMakeCache.txt +cmake_install.cmake \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 7757916d2..9a9032481 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,7 +477,7 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() -add_subdirectory(util) +# add_subdirectory(util) add_subdirectory(doc/dev-reference) if (NOT WIN32) @@ -496,12 +496,12 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS) set(BUILD_CHIMERA TRUE) endif() -add_subdirectory(unit) +# add_subdirectory(unit) if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) - add_subdirectory(tools) + # add_subdirectory(tools) endif() if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) - add_subdirectory(chimera) + # add_subdirectory(chimera) endif() endif() @@ -548,12 +548,12 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS) set(BUILD_CHIMERA TRUE) endif() -add_subdirectory(unit) +# add_subdirectory(unit) if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) - add_subdirectory(tools) + # add_subdirectory(tools) endif() if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) - add_subdirectory(chimera) + # add_subdirectory(chimera) endif() endif() @@ -1424,5 +1424,6 @@ endif () option(BUILD_EXAMPLES "Build Hyperscan example code (default TRUE)" TRUE) if(NOT WIN32 AND BUILD_EXAMPLES) - add_subdirectory(examples) + # add_subdirectory(examples) + add_subdirectory(mytest) endif() diff --git a/mytest/CMakeLists.txt b/mytest/CMakeLists.txt new file mode 100644 index 000000000..1f51e0c22 --- /dev/null +++ b/mytest/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(mytest test1.c) +set_source_files_properties(test1.c PROPERTIES COMPILE_FLAGS + "-Wall -Wno-unused-parameter") +target_link_libraries(mytest hs) + +install(FILES test1.c + DESTINATION ${CMAKE_INSTALL_DOCDIR}/mytest) \ No newline at end of file diff --git a/mytest/test1.c b/mytest/test1.c new file mode 100644 index 000000000..1b1f00010 --- /dev/null +++ b/mytest/test1.c @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include + +#include + +static int eventHandler(unsigned int id, unsigned long long from, + unsigned long long to, unsigned int flags, void *ctx) { + printf("Match for pattern id = %d, at offset %llu\n", id, + to); + return 0; +} +void test_depth(void); +void test_offset(void); +void test_logical_combiantaion(void); +void test_logical_combination_relative_position(void); +void test_depth(void) { + const char *corpus = "aaaxxaaaxxxxxxxxaaa"; + const char *patterns[1] = {"aaa"}; + unsigned int ids[1] = {1}; + unsigned int flags[1] = {0}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_MAX_DEPTH; + e.max_depth = 10; + const hs_expr_ext_t **exts = + malloc(1 * sizeof(hs_expr_ext_t *)); + exts[0] =&e; + + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, 1, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; + } + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; + } + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + } +} +void test_offset(void){ + const char *corpus = "aaaxxaaaxxxxxxxxaaa"; + const char *patterns[1] = {"aaa"}; + unsigned int ids[1] = {1}; + unsigned int flags[1] = {0}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET; + e.min_offset = 5; + e.max_offset = 10; + const hs_expr_ext_t **exts = + malloc(1 * sizeof(hs_expr_ext_t *)); + exts[0] =&e; + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, 1, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; + } + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; + } + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + } +} +void test_logical_combiantaion(void){ + const char *corpus = "aaaxxxbbbxxxxxxxxaaa"; + const char *patterns[7] = {"aaa","bbb","ccc","1 & 2","1 | 2","1 & !2","1 & !3"}; + unsigned int ids[7] = {1,2,3,4,5,6,7}; + unsigned int flags[7] = {HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION}; + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, NULL, 7, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; + } + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; + } + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + } +} +//逻辑组合相对位置判断 +void test_logical_combination_relative_position(void){ + const char *corpus = "aaaxxxbbbxxxxxxxxbbbxx"; + #define pattern_count 3 + const char *patterns[pattern_count] = {"aaa","bbb","1 & 2"}; + unsigned int ids[pattern_count] = {1,2,3}; + unsigned int flags[pattern_count] = {HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_COMBINATION}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_COMBINATION_PRIORITY; + e.combinationPriorityCount = 1; + e.combinationPriority = + malloc(sizeof(hs_combination_subid_priority_t) * 1); + hs_combination_subid_priority_t p; + p.frontID = 1; + p.backID = 2; + p.distance = 10; + e.combinationPriority[0] = &p; + + const hs_expr_ext_t **exts = + malloc(pattern_count* sizeof(hs_expr_ext_t *)); + + exts[0]=NULL; + exts[1] =NULL; + exts[2] =&e; + + + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, pattern_count, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; + } + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; + } + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + } +} +int main(int argc, char *argv[]) { + test_depth(); + test_offset(); + test_logical_combiantaion(); + test_logical_combination_relative_position(); +} diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 35f46b3fe..df24604f9 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -82,7 +82,9 @@ void validateExt(const hs_expr_ext &ext) { HS_EXT_FLAG_MAX_OFFSET | HS_EXT_FLAG_MIN_LENGTH | HS_EXT_FLAG_EDIT_DISTANCE | - HS_EXT_FLAG_HAMMING_DISTANCE; + HS_EXT_FLAG_HAMMING_DISTANCE| + HS_EXT_FLAG_MAX_DEPTH| + HS_EXT_FLAG_COMBINATION_PRIORITY; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -160,6 +162,10 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, throw CompileError("HS_FLAG_QUIET is not supported in " "combination with HS_FLAG_SOM_LEFTMOST."); } + if (flags & HS_EXT_FLAG_COMBINATION_PRIORITY){ + throw CompileError("HS_EXT_FLAG_COMBINATION_PRIORITY is just supported in " + "combination with HS_FLAG_COMBINATION."); + } flags &= ~HS_FLAG_QUIET; ParseMode mode(flags); @@ -219,6 +225,12 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) { expr.hamm_distance = ext->hamming_distance; } + if (ext->flags & HS_EXT_FLAG_MAX_DEPTH) { + if (!(ext->flags & HS_EXT_FLAG_MAX_OFFSET) || + ext->max_depth < expr.max_offset) { + expr.max_offset = ext->max_depth; + } + } } // These are validated in validateExt, so an error will already have been @@ -301,9 +313,13 @@ void addExpression(NG &ng, unsigned index, const char *expression, } if (ext) { validateExt(*ext); - if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET | - HS_EXT_FLAG_MAX_OFFSET)) { - throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and " + if (ext->flags & + ~(HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | + HS_EXT_FLAG_MAX_DEPTH | + HS_EXT_FLAG_COMBINATION_PRIORITY)) { + throw CompileError("only HS_EXT_FLAG_MIN_OFFSET ," + "HS_EXT_FLAG_MAX_DEPTH," + "HS_EXT_FLAG_COMBINATION_PRIORITY and " "HS_EXT_FLAG_MAX_OFFSET extra flags " "are supported in combination " "with HS_FLAG_COMBINATION."); @@ -314,9 +330,18 @@ void addExpression(NG &ng, unsigned index, const char *expression, if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { max_offset = ext->max_offset; } + if (ext->flags & HS_EXT_FLAG_MAX_DEPTH) { + if (!(ext->flags & HS_EXT_FLAG_MAX_OFFSET) || + ext->max_depth < max_offset) { + max_offset = ext->max_depth; + } + } } ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset, max_offset); + if (ext && (ext->flags & HS_EXT_FLAG_COMBINATION_PRIORITY)) { + ng.rm.pl.addPriority(id, ext); + } DEBUG_PRINTF("parsed logical combination expression %u\n", id); } return; diff --git a/src/grey.cpp b/src/grey.cpp index 86a93d25a..e0c0037c2 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -143,7 +143,8 @@ Grey::Grey(void) : smallWriteMergeBatchSize(20), allowTamarama(true), // Tamarama engine tamaChunkSize(100), - dumpFlags(0), + dumpFlags(0xf), + dumpPath("../log/"), limitPatternCount(8000000), // 8M patterns limitPatternLength(16000), // 16K bytes limitGraphVertices(500000), // 500K vertices diff --git a/src/grey.h b/src/grey.h index ed2f845a4..ea00f8291 100644 --- a/src/grey.h +++ b/src/grey.h @@ -36,7 +36,7 @@ namespace ue2 { -struct Grey { +struct Grey {//责控制编译和运行时的各种优化、资源限制、以及匹配引擎的使用。 Grey(void); bool optimiseComponentTree; diff --git a/src/hs.cpp b/src/hs.cpp index ae9cdf146..943a6cd86 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -152,7 +152,7 @@ unsigned getSomPrecision(unsigned mode) { return 8; } - if (mode & HS_MODE_SOM_HORIZON_LARGE) { + if (mode & HS_MODE_SOM_HORIZON_LARGE) {//大中小跨度 return 8; } else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) { return 4; @@ -227,10 +227,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, unsigned somPrecision = getSomPrecision(mode); target_t target_info = platform ? target_t(*platform) - : get_current_target(); + : get_current_target();//com 获取一些硬件环境信息 try { - CompileContext cc(isStreaming, isVectored, target_info, g); + CompileContext cc(isStreaming, isVectored, target_info, g);//初始化上下文 NG ng(cc, elements, somPrecision); for (unsigned int i = 0; i < elements; i++) { diff --git a/src/hs_compile.h b/src/hs_compile.h index 5aa241886..11b0cec66 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -45,7 +45,11 @@ extern "C" { #endif - +typedef struct hs_combination_subid_priority{ + unsigned int frontID; + unsigned int backID; + unsigned int distance; +} hs_combination_subid_priority_t; /** * A type containing error details that is returned by the compile calls (@ref * hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on @@ -265,6 +269,9 @@ typedef struct hs_expr_ext { * hs_expr_ext::flags field. */ unsigned hamming_distance; + unsigned max_depth; + unsigned combinationPriorityCount; + hs_combination_subid_priority_t **combinationPriority; } hs_expr_ext_t; /** @@ -291,6 +298,11 @@ typedef struct hs_expr_ext { /** Flag indicating that the hs_expr_ext::hamming_distance field is used. */ #define HS_EXT_FLAG_HAMMING_DISTANCE 16ULL +/** Flag indicating that the hs_expr_ext::max_depth field is used. */ +#define HS_EXT_FLAG_MAX_DEPTH 32ULL + +#define HS_EXT_FLAG_COMBINATION_PRIORITY 64ULL + /** @} */ /** diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index d4f6902a2..72eb4e2a2 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -129,7 +129,7 @@ hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, match: pos -= cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); + DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);//com 确认命中 hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; @@ -357,7 +357,7 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, static really_inline hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, size_t start, char single, bool noCase, - const struct cb_info *cbi) { + const struct cb_info *cbi) {//com 扫描入口 if (len - start < n->msk_len) { // can't find string of length keyLen in a shorter buffer return HWLM_SUCCESS; diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 71f71e327..ee5e18b60 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -424,10 +424,10 @@ u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, u8 cprime = m->remap[*c]; DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, ourisprint(*c) ? *c : '?', cprime); - s = succ_table[(s << as) + cprime]; + s = succ_table[(s << as) + cprime];//com 猜想s是状态 DEBUG_PRINTF("s: %u\n", s); - c++; + c++;//逐个字符读取 if (do_accel) { if (s >= accel_limit) { break; @@ -540,7 +540,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, return MO_MATCHES_PENDING; } - u64a loc = (c - 1) - buf + offAdj + 1; + u64a loc = (c - 1) - buf + offAdj + 1;//com loc是匹配结束的位置 if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 482fdb1bc..643cb47f3 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -87,7 +87,7 @@ struct mcclellan { u32 sherman_end; /**< offset of the end of the state_info structures * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ - u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state *///com 最小的可接受状态,超过这个状态的都是可接受状态 u16 sherman_limit; /**< lowest sherman state */ u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index a5a9077d4..3244b335c 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -59,7 +59,10 @@ struct ue2_literal; class ExpressionInfo; class RoseBuild; class SmallWriteBuild; - +/* +管理正则表达式的图结构表示(如 NFA 或 DFA)。 +将正则表达式编译为可用于高效匹配的数据结构。 +管理与 SOM 和图优化相关的任务。*/ class NG : noncopyable { public: NG(const CompileContext &in_cc, size_t num_patterns, diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index 96c3bd89d..c918eae40 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -42,7 +42,7 @@ using namespace std; namespace ue2 { u32 ParsedLogical::getLogicalKey(u32 a) { - auto it = toLogicalKeyMap.find(a); + auto it = toLogicalKeyMap.find(a);//一个subid可能被多个comid使用,所以后面的comid寻找的时候,subid可能已经被前面的comid插入了 if (it == toLogicalKeyMap.end()) { // get size before assigning to avoid wacky LHS shenanigans u32 size = toLogicalKeyMap.size(); @@ -88,7 +88,7 @@ do { \ u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) { LogicalOp lop; assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0); - lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size(); + lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();//com 产生一个新的id,并且返回 lop.op = op; lop.lo = left; lop.ro = right; @@ -107,6 +107,8 @@ void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey, ci.result = lkey_result; ci.min_offset = min_offset; ci.max_offset = max_offset; + ci.combinationPriority = NULL; + ci.combinationPriorityCount = 0; combInfoMap.push_back(ci); DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id, @@ -157,7 +159,7 @@ void ParsedLogical::validateSubIDs(const unsigned *ids, } } } - +//com 对combInfoMap中的start和result重新编号 void ParsedLogical::logicalKeyRenumber() { // renumber operation lkey in op vector for (auto &op : logicalTree) { @@ -252,14 +254,14 @@ void popOperator(vector &op_stack, vector &subid_stack, left = subid_stack.back(); subid_stack.pop_back(); } - subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right)); + subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));//com 将left和right生成一个小tree,并返回treeid,并push到subid_stack中,等待和下一个subid组合再生成小tree op_stack.pop_back(); } void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, u32 ekey, u64a min_offset, u64a max_offset) { - u32 ckey = getCombKey(id); + u32 ckey = getCombKey(id);//com 插入comid之前,已有多少个comid被插入 vector op_stack; vector subid_stack; u32 lkey_start = INVALID_LKEY; // logical operation's lkey @@ -296,7 +298,7 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, && cmpOperator(op_stack.back(), op)) { popOperator(op_stack, subid_stack, *this); if (lkey_start == INVALID_LKEY) { - lkey_start = subid_stack.back(); + lkey_start = subid_stack.back();// com 生成的第一个treeid,作为开始的treeid } } op_stack.push_back(op); @@ -326,12 +328,34 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, error.locate(i); throw; } - u32 lkey_result = subid_stack.back(); // logical operation's lkey + u32 lkey_result = subid_stack.back(); // logical operation's lkey com 最后生成的treeid,作为结束的treeid if (lkey_start == INVALID_LKEY) { throw CompileError("No logical operation."); } combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result, min_offset, max_offset); } +void ParsedLogical::addPriority(u32 id, const hs_expr_ext *ext) { + assert(ext->flags & HS_EXT_FLAG_COMBINATION_PRIORITY); + auto it = toCombKeyMap.find(id); + assert(it != toCombKeyMap.end()); + u32 ckey = it->second; + assert(ckey < combInfoMap.size()); + CombInfo &ci = combInfoMap[ckey]; + ci.combinationPriorityCount = ext->combinationPriorityCount; + ci.combinationPriority = (hs_combination_subid_priority_t **)malloc( + sizeof(hs_combination_subid_priority_t) * + ext->combinationPriorityCount); + for (u32 i = 0; i < ext->combinationPriorityCount; i++) { + ci.combinationPriority[i] = (hs_combination_subid_priority_t *)malloc( + sizeof(hs_combination_subid_priority_t)); + ci.combinationPriority[i]->frontID = + toLogicalKeyMap.find((ext->combinationPriority[i]->frontID))->second; + ci.combinationPriority[i]->backID = + toLogicalKeyMap.find((ext->combinationPriority[i]->backID))->second; + ci.combinationPriority[i]->distance = + ext->combinationPriority[i]->distance; + } +} } // namespace ue2 diff --git a/src/parser/logical_combination.h b/src/parser/logical_combination.h index 7c8eb36ef..7b91fca4d 100644 --- a/src/parser/logical_combination.h +++ b/src/parser/logical_combination.h @@ -87,7 +87,7 @@ class ParsedLogical { assert(ckey < combInfoMap.size()); return combInfoMap.at(ckey); } - + void addPriority(u32 id,const hs_expr_ext_t *); private: /** \brief Mapping from ckey to combination info. */ std::vector combInfoMap; diff --git a/src/report.h b/src/report.h index b35f4c052..ec14b5ad6 100644 --- a/src/report.h +++ b/src/report.h @@ -179,6 +179,14 @@ void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey, break; } } +static really_inline void setLogicalOffset(struct hs_scratch *scratch, u32 lkey, + u64a end) { + DEBUG_PRINTF("set hitOffset logical key %u,offset = %llu\n", lkey, end); + if (scratch->core_info.hit_log[lkey]->first == 0) { + scratch->core_info.hit_log[lkey]->first = end; + } + scratch->core_info.hit_log[lkey]->last = end; +} /** \brief Mark key \a ckey on in the combination vector. */ static really_inline @@ -188,17 +196,46 @@ void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) { assert(ckey < rose->ckeyCount); mmbit_set((u8 *)cvec, rose->ckeyCount, ckey); } - +static really_inline int +checkCombinationPriority(const struct CombInfo *ci, + const struct core_info *core_info) { + if (!ci->combinationPriorityCount) { + return 1; + } + for (u32 i = 0; i < ci->combinationPriorityCount; i++) { + u32 frontID = ci->combinationPriority[i]->frontID; + u32 backID = ci->combinationPriority[i]->backID; + u32 distance = ci->combinationPriority[i]->distance; + if (core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first < + distance) { + DEBUG_PRINTF( + "combination priority failed,front lkey = %u,back " + "lkey = %u,expected more than %u,acutal distance is %llu\n", + frontID, backID, distance, + core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first); + return 0; + } + DEBUG_PRINTF( + "combination priority pass,front lkey = %u,back " + "lkey = %u,expected more than %u,acutal distance is %llu\n", + frontID, backID, distance, + core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first); + } + return 1; +} /** \brief Returns 1 if compliant to all logical combinations. */ static really_inline char isLogicalCombination(const struct RoseEngine *rose, char *lvec, - u32 start, u32 result) { + u32 start, u32 result) {//com 可能是突破点,对组合逻辑进行校验 const struct LogicalOp *logicalTree = (const struct LogicalOp *) ((const char *)rose + rose->logicalTreeOffset); assert(start >= rose->lkeyCount); assert(start <= result); assert(result < rose->lkeyCount + rose->lopCount); - for (u32 i = start; i <= result; i++) { + for (u32 i = start; i <= result; i++) {//com 猜想是遍历逻辑树中所有节点 const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount); assert(i == op->id); assert(op->op <= LAST_LOGICAL_OP); @@ -283,6 +320,16 @@ void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) { mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount); mmbit_clear((u8 *)cvec, rose->ckeyCount); } +static really_inline +void clearHitLog(const struct hs_scratch *scratch) { + + DEBUG_PRINTF("clearing hitlog size = %u\n", + scratch->logicalKeyCount); + for (u32 i = 0; i < scratch->logicalKeyCount; i++) { + scratch->core_info.hit_log[i]->first = 0; + scratch->core_info.hit_log[i]->last = 0; + } +} /** \brief Clear all keys in the combination vector. */ static really_inline @@ -299,7 +346,7 @@ void clearCvec(const struct RoseEngine *rose, char *cvec) { */ static really_inline int roseDeliverReport(u64a offset, ReportID onmatch, s32 offset_adjust, - struct hs_scratch *scratch, u32 ekey) { + struct hs_scratch *scratch, u32 ekey) {//com 直接调用eventhandler assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); @@ -325,14 +372,14 @@ int roseDeliverReport(u64a offset, ReportID onmatch, s32 offset_adjust, int halt = ci->userCallback(onmatch, from_offset, to_offset, flags, ci->userContext); - if (halt) { + if (halt) {//com 检查回调函数的返回值,判断是否应该退出匹配 DEBUG_PRINTF("callback requested to terminate matches\n"); ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } if (ekey != INVALID_EKEY) { - markAsMatched(ci->rose, ci->exhaustionVector, ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ekey);//com 标记ekey,后续再次命中时候检查ekey,如果已经被标记则不触发回调 return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/block.c b/src/rose/block.c index b3f424cb7..b69c798c7 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -43,7 +43,7 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, struct hs_scratch *scratch) { const u8 *buffer = scratch->core_info.buf; size_t length = scratch->core_info.len; - size_t alen = MIN(length, t->anchoredDistance); + size_t alen = MIN(length, t->anchoredDistance);//alen是什么? const struct anchored_matcher_info *curr = atable; DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length); diff --git a/src/rose/match.c b/src/rose/match.c index 84d3b1fdc..01f206e3d 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -181,7 +181,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {//com 匹配成功 struct hs_scratch *scratch = ctx; assert(scratch && scratch->magic == SCRATCH_MAGIC); struct RoseContext *tctxt = &scratch->tctxt; @@ -617,7 +617,7 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { const struct RoseEngine *rose = scratch->core_info.rose; // Our match ID is the program offset. - const u32 program = id; + const u32 program = id;//com 决定了调用roserunprogram的初始状态 const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv; if (rose->pureLiteral) { diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 579ce2783..94a24a605 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2025,13 +2025,13 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, static rose_inline hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, - struct hs_scratch *scratch) { + struct hs_scratch *scratch) {//com 这里也有min或者max offset的校验,猜想是命中了subid,判断是否激活combinationID u8 *cvec = (u8 *)scratch->core_info.combVector; - if (!mmbit_any(cvec, t->ckeyCount)) { + if (!mmbit_any(cvec, t->ckeyCount)) {//如果没有设置任何cvec,直接返回 return HWLM_CONTINUE_MATCHING; } u64a end = scratch->tctxt.lastCombMatchOffset; - for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); + for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);//com 遍历所有的待激活的combinationID i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { const struct CombInfo *combInfoMap = (const struct CombInfo *) ((const char *)t + t->combInfoMapOffset); @@ -2062,8 +2062,12 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, DEBUG_PRINTF("Logical Combination Failed!\n"); continue; } - - DEBUG_PRINTF("Logical Combination Passed!\n"); + if (!checkCombinationPriority(ci,&(scratch->core_info))){ + DEBUG_PRINTF("Combination Priority Failed!\n"); + continue; + } + DEBUG_PRINTF("Logical Combination Passed!\n");// com 重点突破口 + //检查ci中的priority是否满足 if (roseReportComb(t, scratch, end, ci->id, 0, ci->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -2150,7 +2154,7 @@ hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t, hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags) { + u64a som, u64a end, u8 prog_flags) {//com 直接回调eventhandler,做很多校验,比如是否满足minoffset,感觉可以在这里处理组合逻辑的前后顺序问题 DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); @@ -2278,7 +2282,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); assert(ri->done_jump); // must progress - pc += ri->done_jump; + pc += ri->done_jump;//com pc这么直接相加,是地址,而不是pc的值相加 PROGRAM_NEXT_INSTRUCTION_JUMP } } @@ -2690,7 +2694,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, enum DedupeResult rv = dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); + is_external_report, ri->quash_som, do_som);//com 猜测:检查去重结果 switch (rv) { case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; @@ -3025,6 +3029,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->lkey < t->lkeyCount); char *lvec = scratch->core_info.logicalVector; setLogicalVal(t, lvec, ri->lkey, 1); + setLogicalOffset(scratch, ri->lkey, end + ri->offset_adjust); updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); } PROGRAM_NEXT_INSTRUCTION @@ -3034,7 +3039,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->ckey != INVALID_CKEY); assert(ri->ckey < t->ckeyCount); char *cvec = scratch->core_info.combVector; - setCombinationActive(t, cvec, ri->ckey); + setCombinationActive(t, cvec, ri->ckey);// 把有可能触发激活的comid全部设置1 } PROGRAM_NEXT_INSTRUCTION @@ -3527,4 +3532,4 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION -#undef PROGRAM_NEXT_INSTRUCTION_JUMP +#undef PROGRAM_NEXT_INSTRUCTION_JUMP \ No newline at end of file diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index df464c280..c6447cf8e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -383,7 +383,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, u32 longLitStreamStateRequired, u32 historyRequired, - RoseStateOffsets *so) { + RoseStateOffsets *so) {//com 计算scratch需要的分配的bstate大小 u32 curr_offset = 0; // First, runtime status (stores per-stream state, like whether we need a @@ -3605,6 +3605,13 @@ map makeLeftQueueMap(const RoseGraph &g, return lqm; } +static void allocHitLog(RoseEngine &proto){ + proto.hit_log = (struct hitOffset **)malloc(sizeof(struct hitOffset *) * proto.lkeyCount); + for (u32 i = 0; i < proto.lkeyCount; i++) { + proto.hit_log[i] = (struct hitOffset *)malloc(sizeof(struct hitOffset)); + memset(proto.hit_log[i], 0, sizeof(struct hitOffset) ); + } +} bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // We keep all our offsets, counts etc. in a prototype RoseEngine which we @@ -3868,7 +3875,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.ematcherRegionSize = ematcher_region_size; proto.size = currOffset; - + allocHitLog(proto); // Time to allocate the real RoseEngine structure, at cacheline alignment. auto engine = make_zeroed_bytecode_ptr(currOffset, 64); assert(engine); // will have thrown bad_alloc otherwise. @@ -3893,4 +3900,5 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { return engine; } + } // namespace ue2 diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 1cf3bbe69..e02056b79 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1679,7 +1679,7 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } #endif // NDEBUG -bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) { +bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) {//com 编译表达式过程中构造rose的关键函数 dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 96c95dbf0..2ca19be98 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -523,7 +523,7 @@ void addLogicalSetRequired(const Report &report, ReportManager &rm, static void makeReport(const RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { + const bool has_som, RoseProgram &program) {//com 猜想是构造状态机的过程 assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 7d781f319..a9b35a6c4 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -52,7 +52,7 @@ struct RoseResources; */ class RoseProgram { private: - std::vector> prog; + std::vector> prog;//com 猜想是存储状态机的vector public: RoseProgram(); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 7bd6779c3..75f22a64c 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -210,6 +210,8 @@ struct RoseStateOffsets { /** size in bytes of logical multibit */ u32 logicalVec_size; + u32 hitLog; + u32 hitLogSize; /** Combination multibit. * * entry per combination key (used by Logical Combination). */ @@ -480,6 +482,7 @@ struct RoseEngine { u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; + struct hitOffset **hit_log; }; struct ALIGN_CL_DIRECTIVE anchored_matcher_info { @@ -554,6 +557,10 @@ struct RoseLongLitHashEntry { u32 str_len; }; +struct hitOffset{ + u64a first; + u64a last; +}; static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/src/runtime.c b/src/runtime.c index a055e5f4f..6c0139577 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -135,7 +135,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->core_info.hbuf = history; s->core_info.hlen = hlen; s->core_info.buf_offset = offset; - + s->core_info.hit_log = rose->hit_log; /* and some stuff not actually in core info */ s->som_set_now_offset = ~0ULL; s->deduper.current_report_offset = ~0ULL; @@ -349,7 +349,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_SUCCESS; } - prefetch_data(data, length); + prefetch_data(data, length);//com 好像啥也没做,宏定义是空的 /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, @@ -364,6 +364,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, scratch->tctxt.lastCombMatchOffset = 0; clearLvec(rose, scratch->core_info.logicalVector, scratch->core_info.combVector); + clearHitLog(scratch); } if (!length) { @@ -404,14 +405,14 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, // Apply the small write engine if and only if the block (buffer) is // small enough. Otherwise, we allow rose &co to deal with it. - if (length < smwr->largestBuffer) { + if (length < smwr->largestBuffer) {//com buffer长度小于35进这里 DEBUG_PRINTF("Attempting small write of block %u bytes long.\n", length); runSmallWriteEngine(smwr, scratch); goto done_scan; } } - +//开始匹配 switch (rose->runtimeImpl) { default: assert(0); diff --git a/src/scratch.c b/src/scratch.c index 9f6d77cdc..020065839 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -86,7 +86,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); u32 som_now_size = proto->som_fatbit_size; u32 som_attempted_size = proto->som_fatbit_size; - + struct hs_scratch *s; struct hs_scratch *s_tmp; size_t queue_size = queueCount * sizeof(struct mq); @@ -98,7 +98,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { anchored_literal_region_len, proto->anchored_literal_fatbit_size); size_t delay_region_size = fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); - // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize @@ -138,7 +137,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->scratch_alloc = (char *)s_tmp; s->fdr_conf = NULL; - // each of these is at an offset from the previous + // each of these is at an offset from the previous //com 注意,current指向s的后面 char *current = (char *)s + sizeof(*s); // align current so that the following arrays are naturally aligned: this @@ -275,7 +274,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, int resize = 0; hs_scratch_t *proto; - hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); + hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);//多分配了256,所以下面找64位对齐的时候,可以分配给proto hs_error_t proto_ret = hs_check_alloc(proto_tmp); if (proto_ret != HS_SUCCESS) { hs_scratch_free(proto_tmp); @@ -286,7 +285,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, return proto_ret; } - proto = ROUNDUP_PTR(proto_tmp, 64); + proto = ROUNDUP_PTR(proto_tmp, 64);//com 向后找一个64位对齐的位置 if (*scratch) { *proto = **scratch; diff --git a/src/scratch.h b/src/scratch.h index e3cd92452..a3e48a73d 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -87,6 +87,7 @@ struct catchup_pq { /** \brief Status flag: Unexpected Rose program error. */ #define STATUS_ERROR (1U << 3) + /** \brief Core information about the current scan, used everywhere. */ struct core_info { void *userContext; /**< user-supplied context */ @@ -107,6 +108,7 @@ struct core_info { size_t hlen; /**< length of history buffer in bytes. */ u64a buf_offset; /**< stream offset, for the base of the buffer */ u8 status; /**< stream status bitmask, using STATUS_ flags above */ + struct hitOffset **hit_log; }; /** \brief Rose state information. */ @@ -177,10 +179,11 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 bStateSize; /**< sizeof block mode states */ u32 tStateSize; /**< sizeof transient rose states */ u32 fullStateSize; /**< size of uncompressed nfa state */ + u32 logicalKeyCount; /**< number of logical keys */ struct RoseContext tctxt; char *bstate; /**< block mode states */ char *tstate; /**< state for transient roses */ - char *fullState; /**< uncompressed NFA state */ +char *fullState; /**< uncompressed NFA state */ struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid * & active */ diff --git a/src/util/logical.h b/src/util/logical.h index 0c8b6469a..fa1938ea5 100644 --- a/src/util/logical.h +++ b/src/util/logical.h @@ -34,7 +34,7 @@ #define LOGICAL_H #include "ue2common.h" - +#include "hs_compile.h" /** Index meaning a given logical key is invalid. */ #define INVALID_LKEY (~(u32)0) #define INVALID_CKEY INVALID_LKEY @@ -67,6 +67,8 @@ struct CombInfo { u32 result; //!< ckey of logical operation to give final result u64a min_offset; u64a max_offset; + u32 combinationPriorityCount; + hs_combination_subid_priority_t ** combinationPriority; }; /** Temporarily use to seperate operations' id from reports' lkey diff --git a/src/util/multibit.h b/src/util/multibit.h index c3a4ba461..c8d19a233 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -345,8 +345,8 @@ void mmbit_clear(u8 *bits, u32 total_bits) { /** \brief Specialisation of \ref mmbit_set for flat models. */ static really_inline char mmbit_set_flat(u8 *bits, u32 total_bits, u32 key) { - bits += mmbit_flat_select_byte(key, total_bits); - u8 mask = 1U << (key % 8); + bits += mmbit_flat_select_byte(key, total_bits);//com 位图bitmap某一个字节 + u8 mask = 1U << (key % 8);//com 字节内的某一比特位 char was_set = !!(*bits & mask); *bits |= mask; return was_set; @@ -391,9 +391,9 @@ char mmbit_isset(const u8 *bits, u32 total_bits, u32 key); /** \brief Sets the given key in the multibit. Returns 0 if the key was NOT * already set, 1 otherwise. */ static really_inline -char mmbit_set(u8 *bits, u32 total_bits, u32 key) { +char mmbit_set(u8 *bits, u32 total_bits, u32 key) {//com totalbits<256时,不起作用 MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key); - char status = mmbit_set_i(bits, total_bits, key); + char status = mmbit_set_i(bits, total_bits, key);//com 卧槽阿 位图bitmap MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status); assert(mmbit_isset(bits, total_bits, key)); return status; diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 015dc9c85..705989684 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -170,7 +170,7 @@ class ReportManager : noncopyable { std::unordered_map externalIdMap; /** \brief Mapping from expression index to exhaustion key. */ - std::map toExhaustibleKeyMap; + std::map toExhaustibleKeyMap;//com 设置了HS_FLAG_SINGLEMATCH /** \brief Unallocated expression index, used for \ref * getUnassociatedExhaustibleKey. diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 0ea8bce51..61f2193eb 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -178,7 +178,7 @@ TEST_P(ExprInfop, check_ext_null) { free(info); } -static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0, 0 }; +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0, 0 ,0}; static const expected_info ei_test[] = { {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, @@ -221,67 +221,67 @@ static const expected_info ei_test[] = { {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, // Some cases with extended parameters. - {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0, 0}, 6, UINT_MAX, 0, 0, 0}, - - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0,0}, 6, 10, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0,0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0,0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0,0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0, 0,0}, 6, UINT_MAX, 0, 0, 0}, + + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0,0}, 10, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0,0}, 10, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, 7, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, 7, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2, 0,0}, 8, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2,0}, 10, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2,0}, 10, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 6, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 6, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, };