From 0dff0899d8ae649718ec7d30583d98a44e88868d Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Wed, 11 Sep 2024 13:39:41 +0800 Subject: [PATCH 01/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0test1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 + CMakeLists.txt | 1 + mytest/CMakeLists.txt | 7 ++ mytest/test1.c | 231 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 mytest/CMakeLists.txt create mode 100644 mytest/test1.c diff --git a/.gitignore b/.gitignore index 607453a5d..afb31e7a6 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,7 @@ pcre/pcre_stringpiece.h pcre/pcrecpparg.h pcre/stamp-h1 pcre/test-driver +build/ +docs/ +Doxyfile +data/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 7757916d2..78dbc4905 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1425,4 +1425,5 @@ endif () option(BUILD_EXAMPLES "Build Hyperscan example code (default TRUE)" TRUE) if(NOT WIN32 AND BUILD_EXAMPLES) add_subdirectory(examples) + add_subdirectory(mytest) endif() diff --git a/mytest/CMakeLists.txt b/mytest/CMakeLists.txt new file mode 100644 index 000000000..1f51e0c22 --- /dev/null +++ b/mytest/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(mytest test1.c) +set_source_files_properties(test1.c PROPERTIES COMPILE_FLAGS + "-Wall -Wno-unused-parameter") +target_link_libraries(mytest hs) + +install(FILES test1.c + DESTINATION ${CMAKE_INSTALL_DOCDIR}/mytest) \ No newline at end of file diff --git a/mytest/test1.c b/mytest/test1.c new file mode 100644 index 000000000..30a97b0f0 --- /dev/null +++ b/mytest/test1.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2015-2021, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperscan example program 1: simplegrep + * + * This is a simple example of Hyperscan's most basic functionality: it will + * search a given input file for a pattern supplied as a command-line argument. + * It is intended to demonstrate correct usage of the hs_compile and hs_scan + * functions of Hyperscan. + * + * Patterns are scanned in 'DOTALL' mode, which is equivalent to PCRE's '/s' + * modifier. This behaviour can be changed by modifying the "flags" argument to + * hs_compile. + * + * Build instructions: + * + * gcc -o simplegrep simplegrep.c $(pkg-config --cflags --libs libhs) + * + * Usage: + * + * ./simplegrep + * + * Example: + * + * ./simplegrep int simplegrep.c + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +/** + * This is the function that will be called for each match that occurs. @a ctx + * is to allow you to have some application-specific state that you will get + * access to for each match. In our simple example we're just going to use it + * to pass in the pattern that was being searched for so we can print it out. + */ +static int eventHandler(unsigned int id, unsigned long long from, + unsigned long long to, unsigned int flags, void *ctx) { + printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to); + return 0; +} + +/** + * Fill a data buffer from the given filename, returning it and filling @a + * length with its length. Returns NULL on failure. + */ +static char *readInputData(const char *inputFN, unsigned int *length) { + FILE *f = fopen(inputFN, "rb"); + if (!f) { + fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN, + strerror(errno)); + return NULL; + } + + /* We use fseek/ftell to get our data length, in order to keep this example + * code as portable as possible. */ + if (fseek(f, 0, SEEK_END) != 0) { + fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN, + strerror(errno)); + fclose(f); + return NULL; + } + long dataLen = ftell(f); + if (dataLen < 0) { + fprintf(stderr, "ERROR: ftell() failed: %s\n", strerror(errno)); + fclose(f); + return NULL; + } + if (fseek(f, 0, SEEK_SET) != 0) { + fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN, + strerror(errno)); + fclose(f); + return NULL; + } + + /* Hyperscan's hs_scan function accepts length as an unsigned int, so we + * limit the size of our buffer appropriately. */ + if ((unsigned long)dataLen > UINT_MAX) { + dataLen = UINT_MAX; + printf("WARNING: clipping data to %ld bytes\n", dataLen); + } else if (dataLen == 0) { + fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN); + fclose(f); + return NULL; + } + + char *inputData = malloc(dataLen); + if (!inputData) { + fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen); + fclose(f); + return NULL; + } + + char *p = inputData; + size_t bytesLeft = dataLen; + while (bytesLeft) { + size_t bytesRead = fread(p, 1, bytesLeft, f); + bytesLeft -= bytesRead; + p += bytesRead; + if (ferror(f) != 0) { + fprintf(stderr, "ERROR: fread() failed\n"); + free(inputData); + fclose(f); + return NULL; + } + } + + fclose(f); + + *length = (unsigned int)dataLen; + return inputData; +} + +int main(int argc, char *argv[]) { + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return -1; + } + + char *pattern = argv[1]; + char *inputFN = argv[2]; + + if (access(inputFN, F_OK) != 0) { + fprintf(stderr, "ERROR: file doesn't exist.\n"); + return -1; + } + if (access(inputFN, R_OK) != 0) { + fprintf(stderr, "ERROR: can't be read.\n"); + return -1; + } + + /* First, we attempt to compile the pattern provided on the command line. + * We assume 'DOTALL' semantics, meaning that the '.' meta-character will + * match newline characters. The compiler will analyse the given pattern and + * either return a compiled Hyperscan database, or an error message + * explaining why the pattern didn't compile. + */ + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database, + &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \"%s\": %s\n", + pattern, compile_err->message); + hs_free_compile_error(compile_err); + return -1; + } + + /* Next, we read the input data file into a buffer. */ + unsigned int length; + char *inputData = readInputData(inputFN, &length); + if (!inputData) { + hs_free_database(database); + return -1; + } + + /* Finally, we issue a call to hs_scan, which will search the input buffer + * for the pattern represented in the bytecode. Note that in order to do + * this, scratch space needs to be allocated with the hs_alloc_scratch + * function. In typical usage, you would reuse this scratch space for many + * calls to hs_scan, but as we're only doing one, we'll be allocating it + * and deallocating it as soon as our matching is done. + * + * When matches occur, the specified callback function (eventHandler in + * this file) will be called. Note that although it is reminiscent of + * asynchronous APIs, Hyperscan operates synchronously: all matches will be + * found, and all callbacks issued, *before* hs_scan returns. + * + * In this example, we provide the input pattern as the context pointer so + * that the callback is able to print out the pattern that matched on each + * match event. + */ + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + free(inputData); + hs_free_database(database); + return -1; + } + + printf("Scanning %u bytes with Hyperscan\n", length); + + if (hs_scan(database, inputData, length, 0, scratch, eventHandler, + pattern) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + free(inputData); + hs_free_database(database); + return -1; + } + + /* Scanning is complete, any matches have been handled, so now we just + * clean up and exit. + */ + hs_free_scratch(scratch); + free(inputData); + hs_free_database(database); + return 0; +} From 204fde1c60d98501a06ccbd0a627284623ed0355 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sun, 15 Sep 2024 10:05:05 +0800 Subject: [PATCH 02/16] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E6=A3=80=E6=B5=8B=E6=B7=B1=E5=BA=A6=EF=BC=8C=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E4=B8=80=E4=BA=9B=E7=8C=9C=E6=83=B3=E7=9A=84?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 53 ++++++++++-------------- src/compiler/compiler.cpp | 9 +++- src/hs_compile.h | 4 ++ src/hwlm/noodle_engine.c | 4 +- src/nfa/mcclellan.c | 4 +- src/report.h | 6 +-- src/rose/block.c | 2 +- src/rose/match.c | 2 +- src/rose/program_runtime.c | 61 +++------------------------ src/runtime.c | 2 +- src/util/multibit.h | 2 +- unit/hyperscan/expr_info.cpp | 80 ++++++++++++++++++------------------ 12 files changed, 89 insertions(+), 140 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index 30a97b0f0..dea3d2c91 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -61,6 +61,8 @@ #include +#define PATTERN_COUNT 3 + /** * This is the function that will be called for each match that occurs. @a ctx * is to allow you to have some application-specific state that you will get @@ -150,7 +152,18 @@ int main(int argc, char *argv[]) { return -1; } - char *pattern = argv[1]; + const char *patterns[PATTERN_COUNT] = {"a", "s", "1|2"}; + unsigned int ids[PATTERN_COUNT] = {1, 2, 3}; + unsigned int flags[PATTERN_COUNT] = {0,0,HS_FLAG_COMBINATION}; + + hs_expr_ext_t e1; + e1.flags = HS_EXT_FLAG_MIN_OFFSET|HS_EXT_FLAG_MAX_DEPTH; + e1.min_offset = 3; + e1.max_depth=10; + + const hs_expr_ext_t **exts= malloc(PATTERN_COUNT * sizeof(hs_expr_ext_t *)); + exts[0] = &e1; + char *inputFN = argv[2]; if (access(inputFN, F_OK) != 0) { @@ -161,19 +174,12 @@ int main(int argc, char *argv[]) { fprintf(stderr, "ERROR: can't be read.\n"); return -1; } - - /* First, we attempt to compile the pattern provided on the command line. - * We assume 'DOTALL' semantics, meaning that the '.' meta-character will - * match newline characters. The compiler will analyse the given pattern and - * either return a compiled Hyperscan database, or an error message - * explaining why the pattern didn't compile. - */ hs_database_t *database; hs_compile_error_t *compile_err; - if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database, - &compile_err) != HS_SUCCESS) { - fprintf(stderr, "ERROR: Unable to compile pattern \"%s\": %s\n", - pattern, compile_err->message); + if (hs_compile_ext_multi(patterns, flags, ids, exts,PATTERN_COUNT, HS_MODE_BLOCK, + NULL, &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); hs_free_compile_error(compile_err); return -1; } @@ -185,23 +191,6 @@ int main(int argc, char *argv[]) { hs_free_database(database); return -1; } - - /* Finally, we issue a call to hs_scan, which will search the input buffer - * for the pattern represented in the bytecode. Note that in order to do - * this, scratch space needs to be allocated with the hs_alloc_scratch - * function. In typical usage, you would reuse this scratch space for many - * calls to hs_scan, but as we're only doing one, we'll be allocating it - * and deallocating it as soon as our matching is done. - * - * When matches occur, the specified callback function (eventHandler in - * this file) will be called. Note that although it is reminiscent of - * asynchronous APIs, Hyperscan operates synchronously: all matches will be - * found, and all callbacks issued, *before* hs_scan returns. - * - * In this example, we provide the input pattern as the context pointer so - * that the callback is able to print out the pattern that matched on each - * match event. - */ hs_scratch_t *scratch = NULL; if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); @@ -211,9 +200,9 @@ int main(int argc, char *argv[]) { } printf("Scanning %u bytes with Hyperscan\n", length); - - if (hs_scan(database, inputData, length, 0, scratch, eventHandler, - pattern) != HS_SUCCESS) { + // length =10; + if (hs_scan(database, inputData, length, 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); hs_free_scratch(scratch); free(inputData); diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 35f46b3fe..154c01dba 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -82,7 +82,8 @@ void validateExt(const hs_expr_ext &ext) { HS_EXT_FLAG_MAX_OFFSET | HS_EXT_FLAG_MIN_LENGTH | HS_EXT_FLAG_EDIT_DISTANCE | - HS_EXT_FLAG_HAMMING_DISTANCE; + HS_EXT_FLAG_HAMMING_DISTANCE| + HS_EXT_FLAG_MAX_DEPTH; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -219,6 +220,12 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) { expr.hamm_distance = ext->hamming_distance; } + if (ext->flags & HS_EXT_FLAG_MAX_DEPTH) { + if (!(ext->flags & HS_EXT_FLAG_MAX_OFFSET) || + ext->max_depth < expr.max_offset) { + expr.max_offset = ext->max_depth; + } + } } // These are validated in validateExt, so an error will already have been diff --git a/src/hs_compile.h b/src/hs_compile.h index 5aa241886..9ab1e7a5b 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -265,6 +265,7 @@ typedef struct hs_expr_ext { * hs_expr_ext::flags field. */ unsigned hamming_distance; + unsigned max_depth; } hs_expr_ext_t; /** @@ -291,6 +292,9 @@ typedef struct hs_expr_ext { /** Flag indicating that the hs_expr_ext::hamming_distance field is used. */ #define HS_EXT_FLAG_HAMMING_DISTANCE 16ULL +/** Flag indicating that the hs_expr_ext::max_depth field is used. */ +#define HS_EXT_FLAG_MAX_DEPTH 32ULL + /** @} */ /** diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index d4f6902a2..72eb4e2a2 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -129,7 +129,7 @@ hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, match: pos -= cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); + DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);//com 确认命中 hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; @@ -357,7 +357,7 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, static really_inline hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, size_t start, char single, bool noCase, - const struct cb_info *cbi) { + const struct cb_info *cbi) {//com 扫描入口 if (len - start < n->msk_len) { // can't find string of length keyLen in a shorter buffer return HWLM_SUCCESS; diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 71f71e327..1441e0937 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -424,10 +424,10 @@ u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, u8 cprime = m->remap[*c]; DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, ourisprint(*c) ? *c : '?', cprime); - s = succ_table[(s << as) + cprime]; + s = succ_table[(s << as) + cprime];//com 猜想s是状态 DEBUG_PRINTF("s: %u\n", s); - c++; + c++;//逐个字符读取 if (do_accel) { if (s >= accel_limit) { break; diff --git a/src/report.h b/src/report.h index b35f4c052..7678f3542 100644 --- a/src/report.h +++ b/src/report.h @@ -192,13 +192,13 @@ void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) { /** \brief Returns 1 if compliant to all logical combinations. */ static really_inline char isLogicalCombination(const struct RoseEngine *rose, char *lvec, - u32 start, u32 result) { + u32 start, u32 result) {//com 可能是突破点,对组合逻辑进行校验 const struct LogicalOp *logicalTree = (const struct LogicalOp *) ((const char *)rose + rose->logicalTreeOffset); assert(start >= rose->lkeyCount); assert(start <= result); assert(result < rose->lkeyCount + rose->lopCount); - for (u32 i = start; i <= result; i++) { + for (u32 i = start; i <= result; i++) {//com 猜想是遍历逻辑树中所有节点 const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount); assert(i == op->id); assert(op->op <= LAST_LOGICAL_OP); @@ -299,7 +299,7 @@ void clearCvec(const struct RoseEngine *rose, char *cvec) { */ static really_inline int roseDeliverReport(u64a offset, ReportID onmatch, s32 offset_adjust, - struct hs_scratch *scratch, u32 ekey) { + struct hs_scratch *scratch, u32 ekey) {//com 直接调用eventhandler assert(scratch); assert(scratch->magic == SCRATCH_MAGIC); diff --git a/src/rose/block.c b/src/rose/block.c index b3f424cb7..b69c798c7 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -43,7 +43,7 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, struct hs_scratch *scratch) { const u8 *buffer = scratch->core_info.buf; size_t length = scratch->core_info.len; - size_t alen = MIN(length, t->anchoredDistance); + size_t alen = MIN(length, t->anchoredDistance);//alen是什么? const struct anchored_matcher_info *curr = atable; DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length); diff --git a/src/rose/match.c b/src/rose/match.c index 84d3b1fdc..65c139ee4 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -181,7 +181,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {//com 匹配成功 struct hs_scratch *scratch = ctx; assert(scratch && scratch->magic == SCRATCH_MAGIC); struct RoseContext *tctxt = &scratch->tctxt; diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 579ce2783..2d98a279c 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2025,13 +2025,13 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, static rose_inline hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, - struct hs_scratch *scratch) { + struct hs_scratch *scratch) {//com 这里也有min或者max offset的校验,猜想是命中了subid,判断是否激活combinationID u8 *cvec = (u8 *)scratch->core_info.combVector; if (!mmbit_any(cvec, t->ckeyCount)) { return HWLM_CONTINUE_MATCHING; } u64a end = scratch->tctxt.lastCombMatchOffset; - for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); + for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);//com 遍历所有的待激活的combinationID i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { const struct CombInfo *combInfoMap = (const struct CombInfo *) ((const char *)t + t->combInfoMapOffset); @@ -2063,7 +2063,7 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, continue; } - DEBUG_PRINTF("Logical Combination Passed!\n"); + DEBUG_PRINTF("Logical Combination Passed!\n");// com 重点突破口 if (roseReportComb(t, scratch, end, ci->id, 0, ci->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -2150,7 +2150,7 @@ hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t, hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags) { + u64a som, u64a end, u8 prog_flags) {//com 直接回调eventhandler,做很多校验,比如是否满足minoffset,感觉可以在这里处理组合逻辑的前后顺序问题 DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); @@ -3476,55 +3476,4 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, L_PROGRAM_NEXT_INSTRUCTION L_PROGRAM_CASE(FLUSH_COMBINATION) { - assert(end >= tctxt->lastCombMatchOffset); - if (end > tctxt->lastCombMatchOffset) { - if (flushActiveCombinations(t, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - } - L_PROGRAM_NEXT_INSTRUCTION - - L_PROGRAM_CASE(SET_EXHAUST) { - updateSeqPoint(tctxt, end, from_mpv); - if (roseSetExhaust(t, scratch, ri->ekey) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - work_done = 1; - } - L_PROGRAM_NEXT_INSTRUCTION - - L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) { - assert(end >= tctxt->lastCombMatchOffset); - if (flushActiveCombinations(t, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - if (checkPurelyNegatives(t, scratch, end) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - L_PROGRAM_NEXT_INSTRUCTION - - default: { - assert(0); // unreachable - scratch->core_info.status |= STATUS_ERROR; - return HWLM_TERMINATE_MATCHING; - } - } - } - - assert(0); // unreachable - return HWLM_CONTINUE_MATCHING; -} - -#undef L_PROGRAM_CASE -#undef L_PROGRAM_NEXT_INSTRUCTION -#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION -#undef PROGRAM_NEXT_INSTRUCTION_JUMP + assert(end >= tc \ No newline at end of file diff --git a/src/runtime.c b/src/runtime.c index a055e5f4f..98aa587fb 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -411,7 +411,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, goto done_scan; } } - +//开始匹配 switch (rose->runtimeImpl) { default: assert(0); diff --git a/src/util/multibit.h b/src/util/multibit.h index c3a4ba461..d02fd95b1 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -393,7 +393,7 @@ char mmbit_isset(const u8 *bits, u32 total_bits, u32 key); static really_inline char mmbit_set(u8 *bits, u32 total_bits, u32 key) { MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key); - char status = mmbit_set_i(bits, total_bits, key); + char status = mmbit_set_i(bits, total_bits, key);//com 卧槽阿 位图bitmap MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status); assert(mmbit_isset(bits, total_bits, key)); return status; diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 0ea8bce51..61f2193eb 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -178,7 +178,7 @@ TEST_P(ExprInfop, check_ext_null) { free(info); } -static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0, 0 }; +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0, 0 ,0}; static const expected_info ei_test[] = { {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, @@ -221,67 +221,67 @@ static const expected_info ei_test[] = { {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, // Some cases with extended parameters. - {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0, 0}, 6, UINT_MAX, 0, 0, 0}, - - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0,0}, 6, 10, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0,0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0,0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0,0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0, 0,0}, 6, UINT_MAX, 0, 0, 0}, + + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0,0}, 10, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0,0}, 10, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, 7, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0,0}, 5, 7, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0,0}, 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2, 0,0}, 8, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0,0}, 4, 8, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0,0}, 4, 6, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2,0}, 10, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2,0}, 10, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, UINT_MAX, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 6, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5,0}, 6, 6, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 6, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2,0}, 6, 6, 0, 0, 0}, - {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2}, + {"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2,0}, 6, 6, 0, 0, 0}, }; From a85d11125af232665bad2c8c8abd09e8168fa0ef Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sun, 15 Sep 2024 14:46:20 +0800 Subject: [PATCH 03/16] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=8A=E6=AC=A1?= =?UTF-8?q?=E6=8F=90program=5Fruntime.c?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/rose/program_runtime.c | 53 +++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 2d98a279c..75308be9b 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -3476,4 +3476,55 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, L_PROGRAM_NEXT_INSTRUCTION L_PROGRAM_CASE(FLUSH_COMBINATION) { - assert(end >= tc \ No newline at end of file + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + if (checkPurelyNegatives(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + default: { + assert(0); // unreachable + scratch->core_info.status |= STATUS_ERROR; + return HWLM_TERMINATE_MATCHING; + } + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef L_PROGRAM_CASE +#undef L_PROGRAM_NEXT_INSTRUCTION +#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION +#undef PROGRAM_NEXT_INSTRUCTION_JUMP \ No newline at end of file From d8d72e6f089b35be34215712a262a11d00fe946c Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Mon, 16 Sep 2024 11:42:56 +0800 Subject: [PATCH 04/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=80?= =?UTF-8?q?=E4=BA=9B=E6=B3=A8=E9=87=8A=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86?= =?UTF-8?q?gitignore=EF=BC=8C=E5=A2=9E=E5=8A=A0=E6=97=A5=E6=8A=A5=EF=BC=8C?= =?UTF-8?q?=E4=BF=AE=E6=94=B9cmake=E5=8A=A0=E5=BF=AB=E7=BC=96=E8=AF=91?= =?UTF-8?q?=E9=80=9F=E5=BA=A6=EF=BC=8C=E4=BF=AE=E6=94=B9dumflag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +++ CMakeLists.txt | 16 ++++++++-------- mydoc/daylog.md | 5 +++++ src/grey.cpp | 3 ++- src/grey.h | 2 +- src/hs.cpp | 6 +++--- src/nfagraph/ng.h | 5 ++++- 7 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 mydoc/daylog.md diff --git a/.gitignore b/.gitignore index afb31e7a6..6b6743c10 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,6 @@ build/ docs/ Doxyfile data/ +CMakeFiles/ +CMakeCache.txt +cmake_install.cmake \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 78dbc4905..9a9032481 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,7 +477,7 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() -add_subdirectory(util) +# add_subdirectory(util) add_subdirectory(doc/dev-reference) if (NOT WIN32) @@ -496,12 +496,12 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS) set(BUILD_CHIMERA TRUE) endif() -add_subdirectory(unit) +# add_subdirectory(unit) if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) - add_subdirectory(tools) + # add_subdirectory(tools) endif() if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) - add_subdirectory(chimera) + # add_subdirectory(chimera) endif() endif() @@ -548,12 +548,12 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS) set(BUILD_CHIMERA TRUE) endif() -add_subdirectory(unit) +# add_subdirectory(unit) if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) - add_subdirectory(tools) + # add_subdirectory(tools) endif() if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) - add_subdirectory(chimera) + # add_subdirectory(chimera) endif() endif() @@ -1424,6 +1424,6 @@ endif () option(BUILD_EXAMPLES "Build Hyperscan example code (default TRUE)" TRUE) if(NOT WIN32 AND BUILD_EXAMPLES) - add_subdirectory(examples) + # add_subdirectory(examples) add_subdirectory(mytest) endif() diff --git a/mydoc/daylog.md b/mydoc/daylog.md new file mode 100644 index 000000000..8dae0b0e1 --- /dev/null +++ b/mydoc/daylog.md @@ -0,0 +1,5 @@ +# hyperscan日报 + +## 20240915 + +lldbug这个插件不要用,进不去compile函数!!C++那两个紫色的扩展就行了 diff --git a/src/grey.cpp b/src/grey.cpp index 86a93d25a..4f4779439 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -143,7 +143,8 @@ Grey::Grey(void) : smallWriteMergeBatchSize(20), allowTamarama(true), // Tamarama engine tamaChunkSize(100), - dumpFlags(0), + dumpFlags(0xf), + dumpPath("./"), limitPatternCount(8000000), // 8M patterns limitPatternLength(16000), // 16K bytes limitGraphVertices(500000), // 500K vertices diff --git a/src/grey.h b/src/grey.h index ed2f845a4..ea00f8291 100644 --- a/src/grey.h +++ b/src/grey.h @@ -36,7 +36,7 @@ namespace ue2 { -struct Grey { +struct Grey {//责控制编译和运行时的各种优化、资源限制、以及匹配引擎的使用。 Grey(void); bool optimiseComponentTree; diff --git a/src/hs.cpp b/src/hs.cpp index ae9cdf146..943a6cd86 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -152,7 +152,7 @@ unsigned getSomPrecision(unsigned mode) { return 8; } - if (mode & HS_MODE_SOM_HORIZON_LARGE) { + if (mode & HS_MODE_SOM_HORIZON_LARGE) {//大中小跨度 return 8; } else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) { return 4; @@ -227,10 +227,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, unsigned somPrecision = getSomPrecision(mode); target_t target_info = platform ? target_t(*platform) - : get_current_target(); + : get_current_target();//com 获取一些硬件环境信息 try { - CompileContext cc(isStreaming, isVectored, target_info, g); + CompileContext cc(isStreaming, isVectored, target_info, g);//初始化上下文 NG ng(cc, elements, somPrecision); for (unsigned int i = 0; i < elements; i++) { diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index a5a9077d4..3244b335c 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -59,7 +59,10 @@ struct ue2_literal; class ExpressionInfo; class RoseBuild; class SmallWriteBuild; - +/* +管理正则表达式的图结构表示(如 NFA 或 DFA)。 +将正则表达式编译为可用于高效匹配的数据结构。 +管理与 SOM 和图优化相关的任务。*/ class NG : noncopyable { public: NG(const CompileContext &in_cc, size_t num_patterns, From 0403c0cb52aee87e6936e9faddf1e3ed78b4638d Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Fri, 20 Sep 2024 10:08:26 +0800 Subject: [PATCH 05/16] =?UTF-8?q?=E5=AE=9A=E6=9C=9F=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=80=E4=BA=9B=E6=B3=A8?= =?UTF-8?q?=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 12 ++++++------ src/grey.cpp | 2 +- src/nfa/mcclellan.c | 2 +- src/nfa/mcclellan_internal.h | 2 +- src/rose/match.c | 2 +- src/rose/program_runtime.c | 6 +++--- src/runtime.c | 2 +- src/util/multibit.h | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index dea3d2c91..37c2bf9db 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -61,7 +61,7 @@ #include -#define PATTERN_COUNT 3 +#define PATTERN_COUNT 5 /** * This is the function that will be called for each match that occurs. @a ctx @@ -152,14 +152,14 @@ int main(int argc, char *argv[]) { return -1; } - const char *patterns[PATTERN_COUNT] = {"a", "s", "1|2"}; - unsigned int ids[PATTERN_COUNT] = {1, 2, 3}; - unsigned int flags[PATTERN_COUNT] = {0,0,HS_FLAG_COMBINATION}; + const char *patterns[PATTERN_COUNT] = {"aaa", "bbb", "1 & 2","ccc","2 & 4"}; + unsigned int ids[PATTERN_COUNT] = {1, 2, 3,4,5}; + unsigned int flags[PATTERN_COUNT] = {HS_FLAG_SINGLEMATCH,0,HS_FLAG_COMBINATION,0,HS_FLAG_COMBINATION}; hs_expr_ext_t e1; e1.flags = HS_EXT_FLAG_MIN_OFFSET|HS_EXT_FLAG_MAX_DEPTH; - e1.min_offset = 3; - e1.max_depth=10; + e1.min_offset = 0; + e1.max_depth=100; const hs_expr_ext_t **exts= malloc(PATTERN_COUNT * sizeof(hs_expr_ext_t *)); exts[0] = &e1; diff --git a/src/grey.cpp b/src/grey.cpp index 4f4779439..e0c0037c2 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -144,7 +144,7 @@ Grey::Grey(void) : allowTamarama(true), // Tamarama engine tamaChunkSize(100), dumpFlags(0xf), - dumpPath("./"), + dumpPath("../log/"), limitPatternCount(8000000), // 8M patterns limitPatternLength(16000), // 16K bytes limitGraphVertices(500000), // 500K vertices diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 1441e0937..ee5e18b60 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -540,7 +540,7 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, return MO_MATCHES_PENDING; } - u64a loc = (c - 1) - buf + offAdj + 1; + u64a loc = (c - 1) - buf + offAdj + 1;//com loc是匹配结束的位置 if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 482fdb1bc..643cb47f3 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -87,7 +87,7 @@ struct mcclellan { u32 sherman_end; /**< offset of the end of the state_info structures * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ - u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state *///com 最小的可接受状态,超过这个状态的都是可接受状态 u16 sherman_limit; /**< lowest sherman state */ u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; diff --git a/src/rose/match.c b/src/rose/match.c index 65c139ee4..01f206e3d 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -617,7 +617,7 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { const struct RoseEngine *rose = scratch->core_info.rose; // Our match ID is the program offset. - const u32 program = id; + const u32 program = id;//com 决定了调用roserunprogram的初始状态 const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv; if (rose->pureLiteral) { diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 75308be9b..1756464be 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2027,7 +2027,7 @@ static rose_inline hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, struct hs_scratch *scratch) {//com 这里也有min或者max offset的校验,猜想是命中了subid,判断是否激活combinationID u8 *cvec = (u8 *)scratch->core_info.combVector; - if (!mmbit_any(cvec, t->ckeyCount)) { + if (!mmbit_any(cvec, t->ckeyCount)) {//如果没有编译任何com,则ckeycount=0,无需进行后面的检验 return HWLM_CONTINUE_MATCHING; } u64a end = scratch->tctxt.lastCombMatchOffset; @@ -2278,7 +2278,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); assert(ri->done_jump); // must progress - pc += ri->done_jump; + pc += ri->done_jump;//com pc这么直接相加,是地址,而不是pc的值相加 PROGRAM_NEXT_INSTRUCTION_JUMP } } @@ -2690,7 +2690,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, enum DedupeResult rv = dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust, ri->dkey, ri->offset_adjust, - is_external_report, ri->quash_som, do_som); + is_external_report, ri->quash_som, do_som);//com 猜测:检查去重结果 switch (rv) { case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; diff --git a/src/runtime.c b/src/runtime.c index 98aa587fb..5114416c4 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -349,7 +349,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_SUCCESS; } - prefetch_data(data, length); + prefetch_data(data, length);//com 好像啥也没做,宏定义是空的 /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, diff --git a/src/util/multibit.h b/src/util/multibit.h index d02fd95b1..bd9b1a50d 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -345,8 +345,8 @@ void mmbit_clear(u8 *bits, u32 total_bits) { /** \brief Specialisation of \ref mmbit_set for flat models. */ static really_inline char mmbit_set_flat(u8 *bits, u32 total_bits, u32 key) { - bits += mmbit_flat_select_byte(key, total_bits); - u8 mask = 1U << (key % 8); + bits += mmbit_flat_select_byte(key, total_bits);//com 位图bitmap某一个字节 + u8 mask = 1U << (key % 8);//com 字节内的某一比特位 char was_set = !!(*bits & mask); *bits |= mask; return was_set; From 2d62340d2dbd74a26a6bd37daaa483974b13dc01 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sun, 22 Sep 2024 21:20:04 +0800 Subject: [PATCH 06/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86core=5Finfo?= =?UTF-8?q?=E4=B8=AD=E7=9A=84hitoffset=E7=9A=84=E5=86=85=E5=AD=98=E5=88=86?= =?UTF-8?q?=E9=85=8D=E8=BF=87=E7=A8=8B=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E9=83=A8=E5=88=86=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 12 ++++++------ src/parser/logical_combination.cpp | 12 ++++++------ src/report.h | 4 ++-- src/rose/program_runtime.c | 4 ++-- src/rose/rose_build_bytecode.cpp | 2 +- src/rose/rose_build_compile.cpp | 2 +- src/rose/rose_build_program.cpp | 2 +- src/rose/rose_build_program.h | 2 +- src/rose/rose_internal.h | 2 ++ src/runtime.c | 2 +- src/scratch.c | 23 ++++++++++++++++------- src/scratch.h | 8 +++++++- src/util/multibit.h | 2 +- src/util/report_manager.h | 2 +- 14 files changed, 48 insertions(+), 31 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index 37c2bf9db..2f83dcb1d 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -61,7 +61,8 @@ #include -#define PATTERN_COUNT 5 +#define PATTERN_COUNT 6 + const char *patterns[PATTERN_COUNT] = {"aaa", "bbb", "11 & 12","ccc"," 12 & 14 & 11","ddd"}; /** * This is the function that will be called for each match that occurs. @a ctx @@ -71,9 +72,11 @@ */ static int eventHandler(unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *ctx) { - printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to); + printf("Match for pattern id = %d,expr =%s at offset %llu\n", id,patterns[id-11], to); return 0; } + unsigned int ids[PATTERN_COUNT] = {11, 12, 13,14,15,16}; + unsigned int flags[PATTERN_COUNT] = {HS_FLAG_SINGLEMATCH,0,HS_FLAG_COMBINATION,0,HS_FLAG_COMBINATION,0}; /** * Fill a data buffer from the given filename, returning it and filling @a @@ -146,16 +149,13 @@ static char *readInputData(const char *inputFN, unsigned int *length) { return inputData; } + int main(int argc, char *argv[]) { if (argc != 3) { fprintf(stderr, "Usage: %s \n", argv[0]); return -1; } - const char *patterns[PATTERN_COUNT] = {"aaa", "bbb", "1 & 2","ccc","2 & 4"}; - unsigned int ids[PATTERN_COUNT] = {1, 2, 3,4,5}; - unsigned int flags[PATTERN_COUNT] = {HS_FLAG_SINGLEMATCH,0,HS_FLAG_COMBINATION,0,HS_FLAG_COMBINATION}; - hs_expr_ext_t e1; e1.flags = HS_EXT_FLAG_MIN_OFFSET|HS_EXT_FLAG_MAX_DEPTH; e1.min_offset = 0; diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index 96c3bd89d..388ec0aca 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -42,7 +42,7 @@ using namespace std; namespace ue2 { u32 ParsedLogical::getLogicalKey(u32 a) { - auto it = toLogicalKeyMap.find(a); + auto it = toLogicalKeyMap.find(a);//一个subid可能被多个comid使用,所以后面的comid寻找的时候,subid可能已经被前面的comid插入了 if (it == toLogicalKeyMap.end()) { // get size before assigning to avoid wacky LHS shenanigans u32 size = toLogicalKeyMap.size(); @@ -88,7 +88,7 @@ do { \ u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) { LogicalOp lop; assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0); - lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size(); + lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();//com 产生一个新的id,并且返回 lop.op = op; lop.lo = left; lop.ro = right; @@ -252,14 +252,14 @@ void popOperator(vector &op_stack, vector &subid_stack, left = subid_stack.back(); subid_stack.pop_back(); } - subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right)); + subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));//com 将left和right生成一个小tree,并返回treeid,并push到subid_stack中,等待和下一个subid组合再生成小tree op_stack.pop_back(); } void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, u32 ekey, u64a min_offset, u64a max_offset) { - u32 ckey = getCombKey(id); + u32 ckey = getCombKey(id);//com 插入comid之前,已有多少个comid被插入 vector op_stack; vector subid_stack; u32 lkey_start = INVALID_LKEY; // logical operation's lkey @@ -296,7 +296,7 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, && cmpOperator(op_stack.back(), op)) { popOperator(op_stack, subid_stack, *this); if (lkey_start == INVALID_LKEY) { - lkey_start = subid_stack.back(); + lkey_start = subid_stack.back();// com 生成的第一个treeid,作为开始的treeid } } op_stack.push_back(op); @@ -326,7 +326,7 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, error.locate(i); throw; } - u32 lkey_result = subid_stack.back(); // logical operation's lkey + u32 lkey_result = subid_stack.back(); // logical operation's lkey com 最后生成的treeid,作为结束的treeid if (lkey_start == INVALID_LKEY) { throw CompileError("No logical operation."); } diff --git a/src/report.h b/src/report.h index 7678f3542..7089951f4 100644 --- a/src/report.h +++ b/src/report.h @@ -325,14 +325,14 @@ int roseDeliverReport(u64a offset, ReportID onmatch, s32 offset_adjust, int halt = ci->userCallback(onmatch, from_offset, to_offset, flags, ci->userContext); - if (halt) { + if (halt) {//com 检查回调函数的返回值,判断是否应该退出匹配 DEBUG_PRINTF("callback requested to terminate matches\n"); ci->status |= STATUS_TERMINATED; return MO_HALT_MATCHING; } if (ekey != INVALID_EKEY) { - markAsMatched(ci->rose, ci->exhaustionVector, ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ekey);//com 标记ekey,后续再次命中时候检查ekey,如果已经被标记则不触发回调 return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 1756464be..fc75e4bc0 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2027,7 +2027,7 @@ static rose_inline hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, struct hs_scratch *scratch) {//com 这里也有min或者max offset的校验,猜想是命中了subid,判断是否激活combinationID u8 *cvec = (u8 *)scratch->core_info.combVector; - if (!mmbit_any(cvec, t->ckeyCount)) {//如果没有编译任何com,则ckeycount=0,无需进行后面的检验 + if (!mmbit_any(cvec, t->ckeyCount)) {//如果没有设置任何cvec,直接返回 return HWLM_CONTINUE_MATCHING; } u64a end = scratch->tctxt.lastCombMatchOffset; @@ -3034,7 +3034,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->ckey != INVALID_CKEY); assert(ri->ckey < t->ckeyCount); char *cvec = scratch->core_info.combVector; - setCombinationActive(t, cvec, ri->ckey); + setCombinationActive(t, cvec, ri->ckey);// 把有可能触发激活的comid全部设置1 } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index df464c280..e68f1d9bf 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -383,7 +383,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, u32 longLitStreamStateRequired, u32 historyRequired, - RoseStateOffsets *so) { + RoseStateOffsets *so) {//com 计算scratch需要的分配的bstate大小 u32 curr_offset = 0; // First, runtime status (stores per-stream state, like whether we need a diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 1cf3bbe69..e02056b79 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1679,7 +1679,7 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } #endif // NDEBUG -bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) { +bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) {//com 编译表达式过程中构造rose的关键函数 dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 96c95dbf0..2ca19be98 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -523,7 +523,7 @@ void addLogicalSetRequired(const Report &report, ReportManager &rm, static void makeReport(const RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { + const bool has_som, RoseProgram &program) {//com 猜想是构造状态机的过程 assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 7d781f319..a9b35a6c4 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -52,7 +52,7 @@ struct RoseResources; */ class RoseProgram { private: - std::vector> prog; + std::vector> prog;//com 猜想是存储状态机的vector public: RoseProgram(); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 7bd6779c3..302b5e374 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -210,6 +210,8 @@ struct RoseStateOffsets { /** size in bytes of logical multibit */ u32 logicalVec_size; + u32 hitLog; + u32 hitLogSize; /** Combination multibit. * * entry per combination key (used by Logical Combination). */ diff --git a/src/runtime.c b/src/runtime.c index 5114416c4..a715660d3 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -404,7 +404,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, // Apply the small write engine if and only if the block (buffer) is // small enough. Otherwise, we allow rose &co to deal with it. - if (length < smwr->largestBuffer) { + if (length < smwr->largestBuffer) {//com buffer长度小于35进这里 DEBUG_PRINTF("Attempting small write of block %u bytes long.\n", length); runSmallWriteEngine(smwr, scratch); diff --git a/src/scratch.c b/src/scratch.c index 9f6d77cdc..a5aa73f5f 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -86,7 +86,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); u32 som_now_size = proto->som_fatbit_size; u32 som_attempted_size = proto->som_fatbit_size; - + + u32 hitLogSize = proto->logicalKeyCount * sizeof(struct hitOffset); struct hs_scratch *s; struct hs_scratch *s_tmp; size_t queue_size = queueCount * sizeof(struct mq); @@ -98,7 +99,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { anchored_literal_region_len, proto->anchored_literal_fatbit_size); size_t delay_region_size = fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); - // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize @@ -113,7 +113,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + som_store_size + som_now_size + som_attempted_size - + som_attempted_store_size + 15; + + som_attempted_store_size + 15+hitLogSize; /* the struct plus the allocated stuff plus padding for cacheline * alignment */ @@ -138,7 +138,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->scratch_alloc = (char *)s_tmp; s->fdr_conf = NULL; - // each of these is at an offset from the previous + // each of these is at an offset from the previous //com 注意,current指向s的后面 char *current = (char *)s + sizeof(*s); // align current so that the following arrays are naturally aligned: this @@ -227,6 +227,13 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->fullStateSize = fullStateSize; current += fullStateSize; + current = ROUNDUP_PTR(current, alignof(struct hitOffset *)); + s->core_info.hit_log = (struct hitOffset**)current; + current += sizeof(struct hitOffset *) *s->logicalKeyCount; + for (u32 i = 0; i < s->logicalKeyCount; i++) { + s->core_info.hit_log[i] = (struct hitOffset *)current; + current += sizeof(struct hitOffset); + } *scratch = s; // Don't get too big for your boots @@ -275,7 +282,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, int resize = 0; hs_scratch_t *proto; - hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); + hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);//多分配了256,所以下面找64位对齐的时候,可以分配给proto hs_error_t proto_ret = hs_check_alloc(proto_tmp); if (proto_ret != HS_SUCCESS) { hs_scratch_free(proto_tmp); @@ -286,7 +293,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, return proto_ret; } - proto = ROUNDUP_PTR(proto_tmp, 64); + proto = ROUNDUP_PTR(proto_tmp, 64);//com 向后找一个64位对齐的位置 if (*scratch) { *proto = **scratch; @@ -367,7 +374,9 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, proto->deduper.dkey_count = rose->dkeyCount; proto->deduper.log_size = rose->dkeyLogSize; } - + if(rose->lkeyCount>0){ + proto->logicalKeyCount = rose->lkeyCount; + } if (resize) { if (*scratch) { hs_scratch_free((*scratch)->scratch_alloc); diff --git a/src/scratch.h b/src/scratch.h index e3cd92452..bf1cac56e 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -87,6 +87,10 @@ struct catchup_pq { /** \brief Status flag: Unexpected Rose program error. */ #define STATUS_ERROR (1U << 3) +struct hitOffset{ + u64a first; + u64a last; +}; /** \brief Core information about the current scan, used everywhere. */ struct core_info { void *userContext; /**< user-supplied context */ @@ -107,6 +111,7 @@ struct core_info { size_t hlen; /**< length of history buffer in bytes. */ u64a buf_offset; /**< stream offset, for the base of the buffer */ u8 status; /**< stream status bitmask, using STATUS_ flags above */ + struct hitOffset **hit_log; }; /** \brief Rose state information. */ @@ -177,10 +182,11 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 bStateSize; /**< sizeof block mode states */ u32 tStateSize; /**< sizeof transient rose states */ u32 fullStateSize; /**< size of uncompressed nfa state */ + u32 logicalKeyCount; /**< number of logical keys */ struct RoseContext tctxt; char *bstate; /**< block mode states */ char *tstate; /**< state for transient roses */ - char *fullState; /**< uncompressed NFA state */ +char *fullState; /**< uncompressed NFA state */ struct mq *queues; struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid * & active */ diff --git a/src/util/multibit.h b/src/util/multibit.h index bd9b1a50d..c8d19a233 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -391,7 +391,7 @@ char mmbit_isset(const u8 *bits, u32 total_bits, u32 key); /** \brief Sets the given key in the multibit. Returns 0 if the key was NOT * already set, 1 otherwise. */ static really_inline -char mmbit_set(u8 *bits, u32 total_bits, u32 key) { +char mmbit_set(u8 *bits, u32 total_bits, u32 key) {//com totalbits<256时,不起作用 MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key); char status = mmbit_set_i(bits, total_bits, key);//com 卧槽阿 位图bitmap MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status); diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 015dc9c85..705989684 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -170,7 +170,7 @@ class ReportManager : noncopyable { std::unordered_map externalIdMap; /** \brief Mapping from expression index to exhaustion key. */ - std::map toExhaustibleKeyMap; + std::map toExhaustibleKeyMap;//com 设置了HS_FLAG_SINGLEMATCH /** \brief Unallocated expression index, used for \ref * getUnassociatedExhaustibleKey. From 43f29751ba66b40afb3396dab9f8eb5208932a86 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Mon, 23 Sep 2024 11:22:58 +0800 Subject: [PATCH 07/16] =?UTF-8?q?=E5=91=BD=E4=B8=ADlogical=20ID=20?= =?UTF-8?q?=E7=9A=84=E6=97=B6=E5=80=99=EF=BC=8C=E6=9B=B4=E6=96=B0offset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/report.h | 8 ++++++++ src/rose/program_runtime.c | 1 + 2 files changed, 9 insertions(+) diff --git a/src/report.h b/src/report.h index 7089951f4..845e33ef0 100644 --- a/src/report.h +++ b/src/report.h @@ -179,6 +179,14 @@ void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey, break; } } +static really_inline void setLogicalOffset(struct hs_scratch *scratch, u32 lkey, + u64a end) { + DEBUG_PRINTF("set hitOffset logical key %u,offset = %u\n", lkey, end); + if (scratch->core_info.hit_log[lkey]->first == 0) { + scratch->core_info.hit_log[lkey]->first = end; + } + scratch->core_info.hit_log[lkey]->last = end; +} /** \brief Mark key \a ckey on in the combination vector. */ static really_inline diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index fc75e4bc0..f0176dc72 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -3025,6 +3025,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->lkey < t->lkeyCount); char *lvec = scratch->core_info.logicalVector; setLogicalVal(t, lvec, ri->lkey, 1); + setLogicalOffset(scratch, ri->lkey, end + ri->offset_adjust); updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); } PROGRAM_NEXT_INSTRUCTION From 75d3a33f9ed1ff2bd48a844fd69d3f0c1fb84172 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Mon, 23 Sep 2024 16:40:43 +0800 Subject: [PATCH 08/16] =?UTF-8?q?=E5=9C=A8=E9=80=BB=E8=BE=91=E7=BB=84?= =?UTF-8?q?=E5=90=88=E4=B8=AD=E6=94=AF=E6=8C=81=E6=9C=80=E5=A4=A7=E6=B7=B1?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/compiler/compiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 154c01dba..f7b27037a 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -321,6 +321,12 @@ void addExpression(NG &ng, unsigned index, const char *expression, if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { max_offset = ext->max_offset; } + if (ext->flags & HS_EXT_FLAG_MAX_DEPTH) { + if (!(ext->flags & HS_EXT_FLAG_MAX_OFFSET) || + ext->max_depth < max_offset) { + max_offset = ext->max_depth; + } + } } ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset, max_offset); From aa834f9afb2c1f5735eacfdab0d4c55b7de6c65f Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Tue, 24 Sep 2024 09:49:01 +0800 Subject: [PATCH 09/16] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E9=80=9A=E8=BF=87=E9=80=BB=E8=BE=91=E7=BB=84=E5=90=88=E5=85=88?= =?UTF-8?q?=E5=90=8E=E9=A1=BA=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 14 +++++++++--- src/compiler/compiler.cpp | 16 ++++++++++--- src/hs_compile.h | 10 ++++++++- src/parser/logical_combination.cpp | 24 +++++++++++++++++++- src/parser/logical_combination.h | 2 +- src/report.h | 36 ++++++++++++++++++++++++++++-- src/rose/program_runtime.c | 6 ++++- src/runtime.c | 1 + src/util/logical.h | 4 +++- 9 files changed, 100 insertions(+), 13 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index 2f83dcb1d..edad44af9 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -159,11 +159,19 @@ int main(int argc, char *argv[]) { hs_expr_ext_t e1; e1.flags = HS_EXT_FLAG_MIN_OFFSET|HS_EXT_FLAG_MAX_DEPTH; e1.min_offset = 0; - e1.max_depth=100; - + e1.max_depth=30; + + hs_expr_ext_t e2; + e2.flags= HS_EXT_FLAG_COMBINATION_PRIORITY; + e2.combinationPriorityCount=1; + hs_combination_subid_priority_t p1; + p1.frontID=11; + p1.backID=12; + p1.distance=5; + e2.combinationPriority[0]=p1; const hs_expr_ext_t **exts= malloc(PATTERN_COUNT * sizeof(hs_expr_ext_t *)); exts[0] = &e1; - + exts[2] = &e2; char *inputFN = argv[2]; if (access(inputFN, F_OK) != 0) { diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index f7b27037a..b89105409 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -83,7 +83,8 @@ void validateExt(const hs_expr_ext &ext) { HS_EXT_FLAG_MIN_LENGTH | HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_HAMMING_DISTANCE| - HS_EXT_FLAG_MAX_DEPTH; + HS_EXT_FLAG_MAX_DEPTH| + HS_EXT_FLAG_COMBINATION_PRIORITY; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -161,6 +162,10 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, throw CompileError("HS_FLAG_QUIET is not supported in " "combination with HS_FLAG_SOM_LEFTMOST."); } + if (flags & HS_EXT_FLAG_COMBINATION_PRIORITY){ + throw CompileError("HS_EXT_FLAG_COMBINATION_PRIORITY is just supported in " + "combination with HS_FLAG_COMBINATION."); + } flags &= ~HS_FLAG_QUIET; ParseMode mode(flags); @@ -308,8 +313,10 @@ void addExpression(NG &ng, unsigned index, const char *expression, } if (ext) { validateExt(*ext); - if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET | - HS_EXT_FLAG_MAX_OFFSET)) { + if (ext->flags & + ~(HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | + HS_EXT_FLAG_MAX_DEPTH | + HS_EXT_FLAG_COMBINATION_PRIORITY)) { throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and " "HS_EXT_FLAG_MAX_OFFSET extra flags " "are supported in combination " @@ -330,6 +337,9 @@ void addExpression(NG &ng, unsigned index, const char *expression, } ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset, max_offset); + if (ext && (ext->flags & HS_EXT_FLAG_COMBINATION_PRIORITY)) { + ng.rm.pl.addPriority(id, ext); + } DEBUG_PRINTF("parsed logical combination expression %u\n", id); } return; diff --git a/src/hs_compile.h b/src/hs_compile.h index 9ab1e7a5b..c71646dce 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -45,7 +45,11 @@ extern "C" { #endif - +typedef struct hs_combination_subid_priority{ + unsigned int frontID; + unsigned int backID; + unsigned int distance; +} hs_combination_subid_priority_t; /** * A type containing error details that is returned by the compile calls (@ref * hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on @@ -266,6 +270,8 @@ typedef struct hs_expr_ext { */ unsigned hamming_distance; unsigned max_depth; + unsigned combinationPriorityCount; + hs_combination_subid_priority_t *combinationPriority; } hs_expr_ext_t; /** @@ -295,6 +301,8 @@ typedef struct hs_expr_ext { /** Flag indicating that the hs_expr_ext::max_depth field is used. */ #define HS_EXT_FLAG_MAX_DEPTH 32ULL +#define HS_EXT_FLAG_COMBINATION_PRIORITY 64ULL + /** @} */ /** diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index 388ec0aca..30b5d09c0 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -107,6 +107,8 @@ void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey, ci.result = lkey_result; ci.min_offset = min_offset; ci.max_offset = max_offset; + ci.combinationPriority = NULL; + ci.combinationPriorityCount = 0; combInfoMap.push_back(ci); DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id, @@ -157,7 +159,7 @@ void ParsedLogical::validateSubIDs(const unsigned *ids, } } } - +//com 对combInfoMap中的start和result重新编号 void ParsedLogical::logicalKeyRenumber() { // renumber operation lkey in op vector for (auto &op : logicalTree) { @@ -333,5 +335,25 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result, min_offset, max_offset); } +void ParsedLogical::addPriority(u32 id, const hs_expr_ext *ext) { + assert(ext->flags & HS_EXT_FLAG_COMBINATION_PRIORITY); + auto it = toCombKeyMap.find(id); + assert(it != toCombKeyMap.end()); + u32 ckey = it->second; + assert(ckey < combInfoMap.size()); + CombInfo &ci = combInfoMap[ckey]; + ci.combinationPriorityCount = ext->combinationPriorityCount; + ci.combinationPriority = (hs_combination_subid_priority_t *)malloc( + sizeof(hs_combination_subid_priority_t) * + ext->combinationPriorityCount); + for (u32 i = 0; i < ext->combinationPriorityCount; i++) { + ci.combinationPriority[i].frontID = + toLogicalKeyMap.find((ext->combinationPriority[i].frontID))->second; + ci.combinationPriority[i].backID = + toLogicalKeyMap.find((ext->combinationPriority[i].backID))->second; + ci.combinationPriority[i].distance = + ext->combinationPriority[i].distance; + } +} } // namespace ue2 diff --git a/src/parser/logical_combination.h b/src/parser/logical_combination.h index 7c8eb36ef..7b91fca4d 100644 --- a/src/parser/logical_combination.h +++ b/src/parser/logical_combination.h @@ -87,7 +87,7 @@ class ParsedLogical { assert(ckey < combInfoMap.size()); return combInfoMap.at(ckey); } - + void addPriority(u32 id,const hs_expr_ext_t *); private: /** \brief Mapping from ckey to combination info. */ std::vector combInfoMap; diff --git a/src/report.h b/src/report.h index 845e33ef0..0a12f53f8 100644 --- a/src/report.h +++ b/src/report.h @@ -181,7 +181,7 @@ void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey, } static really_inline void setLogicalOffset(struct hs_scratch *scratch, u32 lkey, u64a end) { - DEBUG_PRINTF("set hitOffset logical key %u,offset = %u\n", lkey, end); + DEBUG_PRINTF("set hitOffset logical key %u,offset = %llu\n", lkey, end); if (scratch->core_info.hit_log[lkey]->first == 0) { scratch->core_info.hit_log[lkey]->first = end; } @@ -196,7 +196,29 @@ void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) { assert(ckey < rose->ckeyCount); mmbit_set((u8 *)cvec, rose->ckeyCount, ckey); } - +static really_inline int +checkCombinationPriority(const struct CombInfo *ci, + const struct core_info *core_info) { + if (!ci->combinationPriorityCount) { + return 1; + } + for (u32 i = 0; i < ci->combinationPriorityCount; i++) { + u32 frontID = ci->combinationPriority[i].frontID; + u32 backID = ci->combinationPriority[i].backID; + u32 distance = ci->combinationPriority[i].distance; + if (core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first < + distance) { + DEBUG_PRINTF("combination priority not match,front lkey = %u,back " + "lkey = %u,expected more than %u,acutal distance is %llu\n", + frontID, backID, distance, + core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first); + return 0; + } + } + return 1; +} /** \brief Returns 1 if compliant to all logical combinations. */ static really_inline char isLogicalCombination(const struct RoseEngine *rose, char *lvec, @@ -291,6 +313,16 @@ void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) { mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount); mmbit_clear((u8 *)cvec, rose->ckeyCount); } +static really_inline +void clearHitLog(const struct hs_scratch *scratch) { + + DEBUG_PRINTF("clearing hitlog size = %u\n", + scratch->logicalKeyCount); + for (u32 i = 0; i < scratch->logicalKeyCount; i++) { + scratch->core_info.hit_log[i]->first = 0; + scratch->core_info.hit_log[i]->last = 0; + } +} /** \brief Clear all keys in the combination vector. */ static really_inline diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index f0176dc72..94a24a605 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -2062,8 +2062,12 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, DEBUG_PRINTF("Logical Combination Failed!\n"); continue; } - + if (!checkCombinationPriority(ci,&(scratch->core_info))){ + DEBUG_PRINTF("Combination Priority Failed!\n"); + continue; + } DEBUG_PRINTF("Logical Combination Passed!\n");// com 重点突破口 + //检查ci中的priority是否满足 if (roseReportComb(t, scratch, end, ci->id, 0, ci->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; diff --git a/src/runtime.c b/src/runtime.c index a715660d3..ee5bb1696 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -364,6 +364,7 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, scratch->tctxt.lastCombMatchOffset = 0; clearLvec(rose, scratch->core_info.logicalVector, scratch->core_info.combVector); + clearHitLog(scratch); } if (!length) { diff --git a/src/util/logical.h b/src/util/logical.h index 0c8b6469a..668b3070a 100644 --- a/src/util/logical.h +++ b/src/util/logical.h @@ -34,7 +34,7 @@ #define LOGICAL_H #include "ue2common.h" - +#include "hs_compile.h" /** Index meaning a given logical key is invalid. */ #define INVALID_LKEY (~(u32)0) #define INVALID_CKEY INVALID_LKEY @@ -67,6 +67,8 @@ struct CombInfo { u32 result; //!< ckey of logical operation to give final result u64a min_offset; u64a max_offset; + u32 combinationPriorityCount; + hs_combination_subid_priority_t * combinationPriority; }; /** Temporarily use to seperate operations' id from reports' lkey From 78cea2810615b29db4c5d0a85a8a49cdcd497ead Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Tue, 24 Sep 2024 11:26:37 +0800 Subject: [PATCH 10/16] =?UTF-8?q?bug:=E5=8D=95priotiity=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=B8=BA=E5=A4=9Apriority?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 20 +++++++++++++++++--- src/compiler/compiler.cpp | 4 +++- src/hs_compile.h | 2 +- src/parser/logical_combination.cpp | 16 +++++++++------- src/report.h | 6 +++--- src/util/logical.h | 2 +- 6 files changed, 34 insertions(+), 16 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index edad44af9..a993bf548 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -163,15 +163,21 @@ int main(int argc, char *argv[]) { hs_expr_ext_t e2; e2.flags= HS_EXT_FLAG_COMBINATION_PRIORITY; - e2.combinationPriorityCount=1; + e2.combinationPriorityCount=2; + e2.combinationPriority = malloc(sizeof(hs_combination_subid_priority_t) * 2); hs_combination_subid_priority_t p1; p1.frontID=11; p1.backID=12; p1.distance=5; - e2.combinationPriority[0]=p1; + hs_combination_subid_priority_t p2; + p2.frontID = 12; + p2.backID = 14; + p2.distance = 5; + e2.combinationPriority[0]=&p1; + e2.combinationPriority[1]=&p2; const hs_expr_ext_t **exts= malloc(PATTERN_COUNT * sizeof(hs_expr_ext_t *)); exts[0] = &e1; - exts[2] = &e2; + exts[4] = &e2; char *inputFN = argv[2]; if (access(inputFN, F_OK) != 0) { @@ -217,6 +223,14 @@ int main(int argc, char *argv[]) { hs_free_database(database); return -1; } + if (hs_scan(database, inputData, length, 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); + free(inputData); + hs_free_database(database); + return -1; + } /* Scanning is complete, any matches have been handled, so now we just * clean up and exit. diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index b89105409..df24604f9 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -317,7 +317,9 @@ void addExpression(NG &ng, unsigned index, const char *expression, ~(HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | HS_EXT_FLAG_MAX_DEPTH | HS_EXT_FLAG_COMBINATION_PRIORITY)) { - throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and " + throw CompileError("only HS_EXT_FLAG_MIN_OFFSET ," + "HS_EXT_FLAG_MAX_DEPTH," + "HS_EXT_FLAG_COMBINATION_PRIORITY and " "HS_EXT_FLAG_MAX_OFFSET extra flags " "are supported in combination " "with HS_FLAG_COMBINATION."); diff --git a/src/hs_compile.h b/src/hs_compile.h index c71646dce..11b0cec66 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -271,7 +271,7 @@ typedef struct hs_expr_ext { unsigned hamming_distance; unsigned max_depth; unsigned combinationPriorityCount; - hs_combination_subid_priority_t *combinationPriority; + hs_combination_subid_priority_t **combinationPriority; } hs_expr_ext_t; /** diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index 30b5d09c0..c918eae40 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -343,16 +343,18 @@ void ParsedLogical::addPriority(u32 id, const hs_expr_ext *ext) { assert(ckey < combInfoMap.size()); CombInfo &ci = combInfoMap[ckey]; ci.combinationPriorityCount = ext->combinationPriorityCount; - ci.combinationPriority = (hs_combination_subid_priority_t *)malloc( + ci.combinationPriority = (hs_combination_subid_priority_t **)malloc( sizeof(hs_combination_subid_priority_t) * ext->combinationPriorityCount); for (u32 i = 0; i < ext->combinationPriorityCount; i++) { - ci.combinationPriority[i].frontID = - toLogicalKeyMap.find((ext->combinationPriority[i].frontID))->second; - ci.combinationPriority[i].backID = - toLogicalKeyMap.find((ext->combinationPriority[i].backID))->second; - ci.combinationPriority[i].distance = - ext->combinationPriority[i].distance; + ci.combinationPriority[i] = (hs_combination_subid_priority_t *)malloc( + sizeof(hs_combination_subid_priority_t)); + ci.combinationPriority[i]->frontID = + toLogicalKeyMap.find((ext->combinationPriority[i]->frontID))->second; + ci.combinationPriority[i]->backID = + toLogicalKeyMap.find((ext->combinationPriority[i]->backID))->second; + ci.combinationPriority[i]->distance = + ext->combinationPriority[i]->distance; } } diff --git a/src/report.h b/src/report.h index 0a12f53f8..149267e1b 100644 --- a/src/report.h +++ b/src/report.h @@ -203,9 +203,9 @@ checkCombinationPriority(const struct CombInfo *ci, return 1; } for (u32 i = 0; i < ci->combinationPriorityCount; i++) { - u32 frontID = ci->combinationPriority[i].frontID; - u32 backID = ci->combinationPriority[i].backID; - u32 distance = ci->combinationPriority[i].distance; + u32 frontID = ci->combinationPriority[i]->frontID; + u32 backID = ci->combinationPriority[i]->backID; + u32 distance = ci->combinationPriority[i]->distance; if (core_info->hit_log[backID]->last - core_info->hit_log[frontID]->first < distance) { diff --git a/src/util/logical.h b/src/util/logical.h index 668b3070a..fa1938ea5 100644 --- a/src/util/logical.h +++ b/src/util/logical.h @@ -68,7 +68,7 @@ struct CombInfo { u64a min_offset; u64a max_offset; u32 combinationPriorityCount; - hs_combination_subid_priority_t * combinationPriority; + hs_combination_subid_priority_t ** combinationPriority; }; /** Temporarily use to seperate operations' id from reports' lkey From 554d75a0b713ea2c43357db2f0b5634c28c2f166 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Tue, 24 Sep 2024 15:56:07 +0800 Subject: [PATCH 11/16] =?UTF-8?q?=E5=B0=86hitlog=E7=9A=84=E7=94=B3?= =?UTF-8?q?=E8=AF=B7=E5=86=85=E5=AD=98=E8=BF=87=E7=A8=8B=E4=BB=8Escratch?= =?UTF-8?q?=E8=BD=AC=E7=A7=BB=E5=88=B0rose?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/rose/rose_internal.h | 5 +++++ src/runtime.c | 2 +- src/scratch.c | 14 ++------------ src/scratch.h | 5 +---- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 302b5e374..75f22a64c 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -482,6 +482,7 @@ struct RoseEngine { u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; + struct hitOffset **hit_log; }; struct ALIGN_CL_DIRECTIVE anchored_matcher_info { @@ -556,6 +557,10 @@ struct RoseLongLitHashEntry { u32 str_len; }; +struct hitOffset{ + u64a first; + u64a last; +}; static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/src/runtime.c b/src/runtime.c index ee5bb1696..6c0139577 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -135,7 +135,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->core_info.hbuf = history; s->core_info.hlen = hlen; s->core_info.buf_offset = offset; - + s->core_info.hit_log = rose->hit_log; /* and some stuff not actually in core info */ s->som_set_now_offset = ~0ULL; s->deduper.current_report_offset = ~0ULL; diff --git a/src/scratch.c b/src/scratch.c index a5aa73f5f..020065839 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -87,7 +87,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 som_now_size = proto->som_fatbit_size; u32 som_attempted_size = proto->som_fatbit_size; - u32 hitLogSize = proto->logicalKeyCount * sizeof(struct hitOffset); struct hs_scratch *s; struct hs_scratch *s_tmp; size_t queue_size = queueCount * sizeof(struct mq); @@ -113,7 +112,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + som_store_size + som_now_size + som_attempted_size - + som_attempted_store_size + 15+hitLogSize; + + som_attempted_store_size + 15; /* the struct plus the allocated stuff plus padding for cacheline * alignment */ @@ -227,13 +226,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->fullStateSize = fullStateSize; current += fullStateSize; - current = ROUNDUP_PTR(current, alignof(struct hitOffset *)); - s->core_info.hit_log = (struct hitOffset**)current; - current += sizeof(struct hitOffset *) *s->logicalKeyCount; - for (u32 i = 0; i < s->logicalKeyCount; i++) { - s->core_info.hit_log[i] = (struct hitOffset *)current; - current += sizeof(struct hitOffset); - } *scratch = s; // Don't get too big for your boots @@ -374,9 +366,7 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, proto->deduper.dkey_count = rose->dkeyCount; proto->deduper.log_size = rose->dkeyLogSize; } - if(rose->lkeyCount>0){ - proto->logicalKeyCount = rose->lkeyCount; - } + if (resize) { if (*scratch) { hs_scratch_free((*scratch)->scratch_alloc); diff --git a/src/scratch.h b/src/scratch.h index bf1cac56e..a3e48a73d 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -87,10 +87,7 @@ struct catchup_pq { /** \brief Status flag: Unexpected Rose program error. */ #define STATUS_ERROR (1U << 3) -struct hitOffset{ - u64a first; - u64a last; -}; + /** \brief Core information about the current scan, used everywhere. */ struct core_info { void *userContext; /**< user-supplied context */ From 303e6ff0ef64780e93def0c99b52eda87d16776b Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Tue, 24 Sep 2024 16:42:44 +0800 Subject: [PATCH 12/16] =?UTF-8?q?=E5=90=8C=E4=B8=8A=E4=B8=80=E6=AC=A1commi?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/rose/rose_build_bytecode.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e68f1d9bf..c6447cf8e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3605,6 +3605,13 @@ map makeLeftQueueMap(const RoseGraph &g, return lqm; } +static void allocHitLog(RoseEngine &proto){ + proto.hit_log = (struct hitOffset **)malloc(sizeof(struct hitOffset *) * proto.lkeyCount); + for (u32 i = 0; i < proto.lkeyCount; i++) { + proto.hit_log[i] = (struct hitOffset *)malloc(sizeof(struct hitOffset)); + memset(proto.hit_log[i], 0, sizeof(struct hitOffset) ); + } +} bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // We keep all our offsets, counts etc. in a prototype RoseEngine which we @@ -3868,7 +3875,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.ematcherRegionSize = ematcher_region_size; proto.size = currOffset; - + allocHitLog(proto); // Time to allocate the real RoseEngine structure, at cacheline alignment. auto engine = make_zeroed_bytecode_ptr(currOffset, 64); assert(engine); // will have thrown bad_alloc otherwise. @@ -3893,4 +3900,5 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { return engine; } + } // namespace ue2 From 39a29925c0bb30adcea315755294dbd2f423031c Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Tue, 24 Sep 2024 17:16:39 +0800 Subject: [PATCH 13/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86checkCombinati?= =?UTF-8?q?onPriority=E4=B8=AD=EF=BC=8Cpass=E6=83=85=E5=86=B5=E4=B8=8B?= =?UTF-8?q?=E7=9A=84=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/report.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/report.h b/src/report.h index 149267e1b..ec14b5ad6 100644 --- a/src/report.h +++ b/src/report.h @@ -209,13 +209,20 @@ checkCombinationPriority(const struct CombInfo *ci, if (core_info->hit_log[backID]->last - core_info->hit_log[frontID]->first < distance) { - DEBUG_PRINTF("combination priority not match,front lkey = %u,back " - "lkey = %u,expected more than %u,acutal distance is %llu\n", - frontID, backID, distance, - core_info->hit_log[backID]->last - - core_info->hit_log[frontID]->first); + DEBUG_PRINTF( + "combination priority failed,front lkey = %u,back " + "lkey = %u,expected more than %u,acutal distance is %llu\n", + frontID, backID, distance, + core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first); return 0; } + DEBUG_PRINTF( + "combination priority pass,front lkey = %u,back " + "lkey = %u,expected more than %u,acutal distance is %llu\n", + frontID, backID, distance, + core_info->hit_log[backID]->last - + core_info->hit_log[frontID]->first); } return 1; } From 3f57e7d7c62beb25d064959600e25e00fcf5148c Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sat, 29 Mar 2025 20:57:25 +0800 Subject: [PATCH 14/16] =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 323 +++++++++++++++++++------------------------------ 1 file changed, 125 insertions(+), 198 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index a993bf548..11f03d194 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -1,57 +1,3 @@ -/* - * Copyright (c) 2015-2021, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Hyperscan example program 1: simplegrep - * - * This is a simple example of Hyperscan's most basic functionality: it will - * search a given input file for a pattern supplied as a command-line argument. - * It is intended to demonstrate correct usage of the hs_compile and hs_scan - * functions of Hyperscan. - * - * Patterns are scanned in 'DOTALL' mode, which is equivalent to PCRE's '/s' - * modifier. This behaviour can be changed by modifying the "flags" argument to - * hs_compile. - * - * Build instructions: - * - * gcc -o simplegrep simplegrep.c $(pkg-config --cflags --libs libhs) - * - * Usage: - * - * ./simplegrep - * - * Example: - * - * ./simplegrep int simplegrep.c - * - */ - #include #include #include @@ -62,181 +8,162 @@ #include #define PATTERN_COUNT 6 - const char *patterns[PATTERN_COUNT] = {"aaa", "bbb", "11 & 12","ccc"," 12 & 14 & 11","ddd"}; - -/** - * This is the function that will be called for each match that occurs. @a ctx - * is to allow you to have some application-specific state that you will get - * access to for each match. In our simple example we're just going to use it - * to pass in the pattern that was being searched for so we can print it out. - */ static int eventHandler(unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *ctx) { - printf("Match for pattern id = %d,expr =%s at offset %llu\n", id,patterns[id-11], to); + printf("Match for pattern id = %d, at offset %llu\n", id, + to); return 0; } - unsigned int ids[PATTERN_COUNT] = {11, 12, 13,14,15,16}; - unsigned int flags[PATTERN_COUNT] = {HS_FLAG_SINGLEMATCH,0,HS_FLAG_COMBINATION,0,HS_FLAG_COMBINATION,0}; - -/** - * Fill a data buffer from the given filename, returning it and filling @a - * length with its length. Returns NULL on failure. - */ -static char *readInputData(const char *inputFN, unsigned int *length) { - FILE *f = fopen(inputFN, "rb"); - if (!f) { - fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN, - strerror(errno)); - return NULL; - } +void test_depth(void); +void test_offset(void); +void test_logical_combiantaion(void); +void test_logical_combination_relative_position(void); +void test_depth(void) { + const char *corpus = "aaaxxaaaxxxxxxxxaaa"; + const char *patterns[1] = {"aaa"}; + unsigned int ids[1] = {1}; + unsigned int flags[1] = {0}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_MAX_DEPTH; + e.max_depth = 10; + const hs_expr_ext_t **exts = + malloc(1 * sizeof(hs_expr_ext_t *)); + exts[0] =&e; - /* We use fseek/ftell to get our data length, in order to keep this example - * code as portable as possible. */ - if (fseek(f, 0, SEEK_END) != 0) { - fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN, - strerror(errno)); - fclose(f); - return NULL; - } - long dataLen = ftell(f); - if (dataLen < 0) { - fprintf(stderr, "ERROR: ftell() failed: %s\n", strerror(errno)); - fclose(f); - return NULL; - } - if (fseek(f, 0, SEEK_SET) != 0) { - fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN, - strerror(errno)); - fclose(f); - return NULL; - } - - /* Hyperscan's hs_scan function accepts length as an unsigned int, so we - * limit the size of our buffer appropriately. */ - if ((unsigned long)dataLen > UINT_MAX) { - dataLen = UINT_MAX; - printf("WARNING: clipping data to %ld bytes\n", dataLen); - } else if (dataLen == 0) { - fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN); - fclose(f); - return NULL; + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, 1, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; } - - char *inputData = malloc(dataLen); - if (!inputData) { - fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen); - fclose(f); - return NULL; + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; } - - char *p = inputData; - size_t bytesLeft = dataLen; - while (bytesLeft) { - size_t bytesRead = fread(p, 1, bytesLeft, f); - bytesLeft -= bytesRead; - p += bytesRead; - if (ferror(f) != 0) { - fprintf(stderr, "ERROR: fread() failed\n"); - free(inputData); - fclose(f); - return NULL; - } + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); } - - fclose(f); - - *length = (unsigned int)dataLen; - return inputData; } - - -int main(int argc, char *argv[]) { - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return -1; +void test_offset(void){ + const char *corpus = "aaaxxaaaxxxxxxxxaaa"; + const char *patterns[1] = {"aaa"}; + unsigned int ids[1] = {1}; + unsigned int flags[1] = {0}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET; + e.min_offset = 5; + e.max_offset = 10; + const hs_expr_ext_t **exts = + malloc(1 * sizeof(hs_expr_ext_t *)); + exts[0] =&e; + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, 1, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; } - - hs_expr_ext_t e1; - e1.flags = HS_EXT_FLAG_MIN_OFFSET|HS_EXT_FLAG_MAX_DEPTH; - e1.min_offset = 0; - e1.max_depth=30; - - hs_expr_ext_t e2; - e2.flags= HS_EXT_FLAG_COMBINATION_PRIORITY; - e2.combinationPriorityCount=2; - e2.combinationPriority = malloc(sizeof(hs_combination_subid_priority_t) * 2); - hs_combination_subid_priority_t p1; - p1.frontID=11; - p1.backID=12; - p1.distance=5; - hs_combination_subid_priority_t p2; - p2.frontID = 12; - p2.backID = 14; - p2.distance = 5; - e2.combinationPriority[0]=&p1; - e2.combinationPriority[1]=&p2; - const hs_expr_ext_t **exts= malloc(PATTERN_COUNT * sizeof(hs_expr_ext_t *)); - exts[0] = &e1; - exts[4] = &e2; - char *inputFN = argv[2]; - - if (access(inputFN, F_OK) != 0) { - fprintf(stderr, "ERROR: file doesn't exist.\n"); - return -1; + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); + hs_free_database(database); + return ; } - if (access(inputFN, R_OK) != 0) { - fprintf(stderr, "ERROR: can't be read.\n"); - return -1; + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != + HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); + hs_free_scratch(scratch); } +} +void test_logical_combiantaion(void){ + const char *corpus = "aaaxxxbbbxxxxxxxxaaa"; + const char *patterns[7] = {"aaa","bbb","ccc","1 & 2","1 | 2","1 & !2","1 & !3"}; + unsigned int ids[7] = {1,2,3,4,5,6,7}; + unsigned int flags[7] = {HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION,HS_FLAG_COMBINATION}; hs_database_t *database; hs_compile_error_t *compile_err; - if (hs_compile_ext_multi(patterns, flags, ids, exts,PATTERN_COUNT, HS_MODE_BLOCK, - NULL, &database, &compile_err) != HS_SUCCESS) { + if (hs_compile_ext_multi(patterns, flags, ids, NULL, 7, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", compile_err->message); hs_free_compile_error(compile_err); - return -1; - } - - /* Next, we read the input data file into a buffer. */ - unsigned int length; - char *inputData = readInputData(inputFN, &length); - if (!inputData) { - hs_free_database(database); - return -1; + return ; } hs_scratch_t *scratch = NULL; if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); - free(inputData); hs_free_database(database); - return -1; + return ; } - - printf("Scanning %u bytes with Hyperscan\n", length); - // length =10; - if (hs_scan(database, inputData, length, 0, scratch, eventHandler, NULL) != + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); hs_free_scratch(scratch); - free(inputData); + } +} +//逻辑组合相对位置判断 +void test_logical_combination_relative_position(void){ + const char *corpus = "aaaxxxbbbxxxxxxxxbbbxx"; + #define pattern_count 3 + const char *patterns[pattern_count] = {"aaa","bbb","1 & 2"}; + unsigned int ids[pattern_count] = {1,2,3}; + unsigned int flags[pattern_count] = {HS_FLAG_QUIET,HS_FLAG_QUIET,HS_FLAG_COMBINATION}; + hs_expr_ext_t e; + e.flags = HS_EXT_FLAG_COMBINATION_PRIORITY; + e.combinationPriorityCount = 1; + e.combinationPriority = + malloc(sizeof(hs_combination_subid_priority_t) * 1); + hs_combination_subid_priority_t p; + p.frontID = 1; + p.backID = 2; + p.distance = 10; + e.combinationPriority[0] = &p; + + const hs_expr_ext_t **exts = + malloc(pattern_count* sizeof(hs_expr_ext_t *)); + + exts[0]=NULL; + exts[1] =NULL; + exts[2] =&e; + + + hs_database_t *database; + hs_compile_error_t *compile_err; + if (hs_compile_ext_multi(patterns, flags, ids, exts, pattern_count, HS_MODE_BLOCK, NULL, + &database, &compile_err) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to compile pattern \": %s\n", + compile_err->message); + hs_free_compile_error(compile_err); + return ; + } + hs_scratch_t *scratch = NULL; + if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { + fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n"); hs_free_database(database); - return -1; + return ; } - if (hs_scan(database, inputData, length, 0, scratch, eventHandler, NULL) != + if (hs_scan(database, corpus, strlen(corpus), 0, scratch, eventHandler, NULL) != HS_SUCCESS) { fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n"); hs_free_scratch(scratch); - free(inputData); - hs_free_database(database); + } +} +int main(int argc, char *argv[]) { + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); return -1; } - - /* Scanning is complete, any matches have been handled, so now we just - * clean up and exit. - */ - hs_free_scratch(scratch); - free(inputData); - hs_free_database(database); - return 0; + test_depth(); + test_offset(); + test_logical_combiantaion(); + test_logical_combination_relative_position(); } From 28477bb64f711b0ce3b60e41c51ad2ea51b9922d Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sat, 29 Mar 2025 21:14:13 +0800 Subject: [PATCH 15/16] =?UTF-8?q?=E5=8E=BB=E9=99=A4argc=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mytest/test1.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mytest/test1.c b/mytest/test1.c index 11f03d194..1b1f00010 100644 --- a/mytest/test1.c +++ b/mytest/test1.c @@ -7,7 +7,6 @@ #include -#define PATTERN_COUNT 6 static int eventHandler(unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *ctx) { printf("Match for pattern id = %d, at offset %llu\n", id, @@ -158,10 +157,6 @@ void test_logical_combination_relative_position(void){ } } int main(int argc, char *argv[]) { - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return -1; - } test_depth(); test_offset(); test_logical_combiantaion(); From 8d6316e1a3daefc7676a810731d2b50c09678836 Mon Sep 17 00:00:00 2001 From: wzh <289410265@qq.com> Date: Sat, 29 Mar 2025 22:01:43 +0800 Subject: [PATCH 16/16] =?UTF-8?q?=E5=88=A0=E9=99=A4mydoc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mydoc/daylog.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 mydoc/daylog.md diff --git a/mydoc/daylog.md b/mydoc/daylog.md deleted file mode 100644 index 8dae0b0e1..000000000 --- a/mydoc/daylog.md +++ /dev/null @@ -1,5 +0,0 @@ -# hyperscan日报 - -## 20240915 - -lldbug这个插件不要用,进不去compile函数!!C++那两个紫色的扩展就行了