From 699a0b6deb76021e31d7fb3a3d57e40bb072679f Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 03:43:59 -0800 Subject: [PATCH 01/12] fix: update entrypoint collector and enhance FastAPI route detection logic --- .../entrypoint-collector/python-entrypoint.ts | 9 + .../fastapi-entrypoint.ts | 322 ++++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts diff --git a/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts b/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts index 909f9bc8..56d4383c 100644 --- a/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts +++ b/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts @@ -1,4 +1,5 @@ const { findFlaskEntryPointAndSource } = require('../../flask/entrypoint-collector/flask-default-entrypoint') +const { findFastApiEntryPointAndSource } = require('../../fastapi/entrypoint-collector/fastapi-entrypoint') const { findInferenceAiStudioTplEntryPointAndSource, findInferenceTritonEntryPointAndSource, @@ -37,6 +38,14 @@ function findPythonFcEntryPointAndSource(dir: string, fileManager: FileManager): pyFcEntryPointSourceArray.push(...flaskEntryPointSourceArray) } + const { fastApiEntryPointArray, fastApiEntryPointSourceArray } = findFastApiEntryPointAndSource(filenameAstObj, dir) + if (fastApiEntryPointArray) { + pyFcEntryPointArray.push(...fastApiEntryPointArray) + } + if (fastApiEntryPointSourceArray) { + pyFcEntryPointSourceArray.push(...fastApiEntryPointSourceArray) + } + const { inferenceAiStudioTplEntryPointArray, inferenceAiStudioTplEntryPointSourceArray } = findInferenceAiStudioTplEntryPointAndSource(filenameAstObj, dir) if (inferenceAiStudioTplEntryPointArray) { diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts new file mode 100644 index 00000000..eacc0fae --- /dev/null +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -0,0 +1,322 @@ +import { extractRelativePath } from '../../../../../util/file-util' +import * as Constant from '../../../../../util/constant' +import { EntryPoint } from '../../../common/entrypoint' + + +const config = require('../../../../../config') +const { entryPointAndSourceAtSameTime } = config + +const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') +const { findSourceOfFuncParam } = PythonEntrypointSource + +const EntryPointClass = require('../../../common/entrypoint') + +interface ASTObject { + body?: any[] + [key: string]: any +} + +interface FilenameAstMap { + [filename: string]: ASTObject +} + +interface ValidInstances { + validFastApiInstances: Set + validRouterInstances: Set +} + +interface EntryPointResult { + fastApiEntryPointArray: EntryPoint[] + fastApiEntryPointSourceArray: any[] +} + +const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) + +/** + * + * @param node + * @returns + */ +function extractLiteralString(node: any): string | null { + if (!node) return null + if (node.type === 'Literal' && typeof node.value === 'string') { + return node.value + } + return null +} + +/** + * + * @param route + * @returns + */ +function extractRouteParams(route: string | null): string[] { + if (!route) return [] + const regex = /\{(.*?)\}/g + const params: string[] = [] + let match: RegExpExecArray | null + while ((match = regex.exec(route)) !== null) { + const name = match[1].split(':').pop() + if (name) params.push(name) + } + return params +} + +/** + * + * @param obj + * @returns + */ +function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { + try { + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + if (obj.left?.type === 'Identifier') { + return { varName: obj.left.name, init: obj.right } + } + } + } catch (error) { + } + return null +} + +/** + * + * @param body + * @returns + */ +function analyzeImports(body: any[]): Map { + const map = new Map() + if (!Array.isArray(body)) return map + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { + const importExpr = obj.init + const localName = obj.id?.name + if (!localName) continue + + const fromValue = extractLiteralString(importExpr.from) + const importedName = importExpr.imported?.name // Identifier + + if (fromValue) { + // from ... import ... + if (fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) { + if (importedName) { + // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' + // (case: fastapi.applications) + map.set(localName, `fastapi.${importedName}`) + } + } + } else if (importedName === 'fastapi') { + // import fastapi + map.set(localName, 'fastapi') + } + } + } + return map +} + +/** + * + * @param node + * @param importMap + * @returns + */ +function resolveCanonicalName(node: any, importMap: Map): string | null { + if (!node) return null + if (node.type === 'Identifier') { + return importMap.get(node.name) || null + } + if (node.type === 'MemberAccess') { + const objectCanonical = resolveCanonicalName(node.object, importMap) + const propertyName = node.property?.name + if (objectCanonical && propertyName) { + return `${objectCanonical}.${propertyName}` + } + } + return null +} + +/** + * + * @param body + * @param importMap + * @returns + */ +function collectValidInstances(body: any[], importMap: Map): ValidInstances { + const validFastApiInstances = new Set() + const validRouterInstances = new Set() + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + // Only process AssignmentExpression + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + const varInfo = extractVarNameAndInit(obj) + if (!varInfo?.varName || !varInfo.init) continue + + if (varInfo.init.type === 'CallExpression') { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap) + if (canonical === 'fastapi.FastAPI') { + validFastApiInstances.add(varInfo.varName) + } else if (canonical === 'fastapi.APIRouter') { + validRouterInstances.add(varInfo.varName) + } + } + } + } + return { validFastApiInstances, validRouterInstances } +} + +/** + * + * @param deco + * @param funcName + * @param obj + * @param relativeFile + * @param filename + * @param validInstances + * @param entryPoints + * @param entryPointSources + */ +function processDecorator( + deco: any, + funcName: string, + obj: any, + relativeFile: string, + filename: string, + validInstances: ValidInstances, + entryPoints: EntryPoint[], + entryPointSources: any[] +): void { + if (!deco || deco.type !== 'CallExpression') return + const { callee } = deco + + if (!callee || callee.type !== 'MemberAccess') return + + const methodName = callee.property?.name + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return + + // Get router or app name + let routerName = '' + if (callee.object?.type === 'Identifier') { + routerName = callee.object.name + } + + // Validate router/app + const { validFastApiInstances, validRouterInstances } = validInstances + const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) + + if (!isValidRouter) return + + // Create entrypoint + const routePath = extractLiteralString(deco.arguments?.[0]) + const params = extractRouteParams(routePath) + + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) + entryPoint.filePath = relativeFile + entryPoint.functionName = funcName + entryPoint.attribute = 'HTTP' + + entryPoints.push(entryPoint) + + if (entryPointAndSourceAtSameTime) { + const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) + + if (filename !== relativeFile) { + const extra = findSourceOfFuncParam(filename, funcName, obj, undefined) + if (extra?.length) entryPointSources.push(...extra) + } + + if (paramSources) { + entryPointSources.push(...paramSources) + const allScopeSources = paramSources.map((s: any) => ({ ...s, scopeFile: 'all' })) + entryPointSources.push(...allScopeSources) + } + + if (params.length && Array.isArray(obj.parameters)) { + for (const p of obj.parameters) { + const pn = p.id?.name + if (pn && params.includes(pn)) { + entryPointSources.push({ + introPoint: 4, + kind: 'PYTHON_INPUT', + path: pn, + scopeFile: 'all', + scopeFunc: funcName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + locColumnStart: p.loc?.start?.column, + locColumnEnd: p.loc?.end?.column, + }) + } + } + } + } +} + +/** + * + * @param filenameAstObj + * @param dir + * @returns + */ +function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { + const entryPoints: EntryPoint[] = [] + const entryPointSources: any[] = [] + + for (const filename in filenameAstObj) { + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue + const fileObj = filenameAstObj[filename] + if (!fileObj?.body) continue + + // Calculate relative path + const { body } = fileObj + const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename + + if (!relativeFile) continue + + const importMap = analyzeImports(body) + + let hasFastApiImport = false + for (const val of importMap.values()) { + if (val === 'fastapi' || val.startsWith('fastapi.')) { + hasFastApiImport = true + break + } + } + if (!hasFastApiImport) continue + + const validInstances = collectValidInstances(body, importMap) + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { + const funcName = obj.id.name + const { decorators } = obj._meta + + for (const deco of decorators) { + processDecorator( + deco, + funcName, + obj, + relativeFile, + filename, + validInstances, + entryPoints, + entryPointSources + ) + } + } + } + } + + return { + fastApiEntryPointArray: entryPoints, + fastApiEntryPointSourceArray: entryPointSources, + } +} + +export = { findFastApiEntryPointAndSource } From 86f34736303a6717a7bb07a30884d54d987b58f3 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 05:13:47 -0800 Subject: [PATCH 02/12] fix: update fastapi entrypoint --- .../fastapi-entrypoint.ts | 323 ++++++++++-------- 1 file changed, 190 insertions(+), 133 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index eacc0fae..8b395de6 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -1,36 +1,44 @@ -import { extractRelativePath } from '../../../../../util/file-util' -import * as Constant from '../../../../../util/constant' -import { EntryPoint } from '../../../common/entrypoint' +import { extractRelativePath } from "../../../../../util/file-util"; +import * as Constant from "../../../../../util/constant"; +import { EntryPoint } from "../../../common/entrypoint"; +const config = require("../../../../../config"); +const { entryPointAndSourceAtSameTime } = config; -const config = require('../../../../../config') -const { entryPointAndSourceAtSameTime } = config +const PythonEntrypointSource = require("../../common/entrypoint-collector/python-entrypoint-source"); +const { findSourceOfFuncParam } = PythonEntrypointSource; -const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') -const { findSourceOfFuncParam } = PythonEntrypointSource - -const EntryPointClass = require('../../../common/entrypoint') +const EntryPointClass = require("../../../common/entrypoint"); interface ASTObject { - body?: any[] - [key: string]: any + body?: any[]; + [key: string]: any; } interface FilenameAstMap { - [filename: string]: ASTObject + [filename: string]: ASTObject; } interface ValidInstances { - validFastApiInstances: Set - validRouterInstances: Set + validFastApiInstances: Set; + validRouterInstances: Set; } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[] - fastApiEntryPointSourceArray: any[] + fastApiEntryPointArray: EntryPoint[]; + fastApiEntryPointSourceArray: any[]; } -const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) +const ROUTE_DECORATORS = new Set([ + "get", + "post", + "put", + "delete", + "patch", + "options", + "head", + "route", +]); /** * @@ -38,11 +46,11 @@ const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'opti * @returns */ function extractLiteralString(node: any): string | null { - if (!node) return null - if (node.type === 'Literal' && typeof node.value === 'string') { - return node.value + if (!node) return null; + if (node.type === "Literal" && typeof node.value === "string") { + return node.value; } - return null + return null; } /** @@ -51,15 +59,15 @@ function extractLiteralString(node: any): string | null { * @returns */ function extractRouteParams(route: string | null): string[] { - if (!route) return [] - const regex = /\{(.*?)\}/g - const params: string[] = [] - let match: RegExpExecArray | null + if (!route) return []; + const regex = /\{(.*?)\}/g; + const params: string[] = []; + let match: RegExpExecArray | null; while ((match = regex.exec(route)) !== null) { - const name = match[1].split(':').pop() - if (name) params.push(name) + const name = match[1].split(":").pop(); + if (name) params.push(name); } - return params + return params; } /** @@ -67,16 +75,17 @@ function extractRouteParams(route: string | null): string[] { * @param obj * @returns */ -function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { +function extractVarNameAndInit( + obj: any, +): { varName?: string; init?: any } | null { try { - if (obj.type === 'AssignmentExpression' && obj.operator === '=') { - if (obj.left?.type === 'Identifier') { - return { varName: obj.left.name, init: obj.right } + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + if (obj.left?.type === "Identifier") { + return { varName: obj.left.name, init: obj.right }; } } - } catch (error) { - } - return null + } catch (error) { } + return null; } /** @@ -85,36 +94,51 @@ function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | nul * @returns */ function analyzeImports(body: any[]): Map { - const map = new Map() - if (!Array.isArray(body)) return map + const map = new Map(); + if (!Array.isArray(body)) return map; for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; - if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { - const importExpr = obj.init - const localName = obj.id?.name - if (!localName) continue + if ( + obj.type === "VariableDeclaration" && + obj.init?.type === "ImportExpression" + ) { + const importExpr = obj.init; + const localName = obj.id?.name; + if (!localName) continue; - const fromValue = extractLiteralString(importExpr.from) - const importedName = importExpr.imported?.name // Identifier + const fromValue = extractLiteralString(importExpr.from); + const importedName = importExpr.imported?.name; // Identifier if (fromValue) { // from ... import ... - if (fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) { + if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { if (importedName) { // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`) + map.set(localName, `fastapi.${importedName}`); } } - } else if (importedName === 'fastapi') { - // import fastapi - map.set(localName, 'fastapi') + } else if ( + importedName === "fastapi" || + importedName === "fastapi.applications" || + importedName === "fastapi.routing" || + importedName?.startsWith("fastapi.") + ) { + // import fastapi or import fastapi.applications + if ( + importedName === localName || + importedName.startsWith(`${localName}.`) + ) { + map.set(localName, localName); + } else { + map.set(localName, importedName); + } } } } - return map + return map; } /** @@ -123,19 +147,22 @@ function analyzeImports(body: any[]): Map { * @param importMap * @returns */ -function resolveCanonicalName(node: any, importMap: Map): string | null { - if (!node) return null - if (node.type === 'Identifier') { - return importMap.get(node.name) || null +function resolveCanonicalName( + node: any, + importMap: Map, +): string | null { + if (!node) return null; + if (node.type === "Identifier") { + return importMap.get(node.name) || null; } - if (node.type === 'MemberAccess') { - const objectCanonical = resolveCanonicalName(node.object, importMap) - const propertyName = node.property?.name + if (node.type === "MemberAccess") { + const objectCanonical = resolveCanonicalName(node.object, importMap); + const propertyName = node.property?.name; if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}` + return `${objectCanonical}.${propertyName}`; } } - return null + return null; } /** @@ -144,29 +171,38 @@ function resolveCanonicalName(node: any, importMap: Map): string * @param importMap * @returns */ -function collectValidInstances(body: any[], importMap: Map): ValidInstances { - const validFastApiInstances = new Set() - const validRouterInstances = new Set() +function collectValidInstances( + body: any[], + importMap: Map, +): ValidInstances { + const validFastApiInstances = new Set(); + const validRouterInstances = new Set(); for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; // Only process AssignmentExpression - if (obj.type === 'AssignmentExpression' && obj.operator === '=') { - const varInfo = extractVarNameAndInit(obj) - if (!varInfo?.varName || !varInfo.init) continue - - if (varInfo.init.type === 'CallExpression') { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap) - if (canonical === 'fastapi.FastAPI') { - validFastApiInstances.add(varInfo.varName) - } else if (canonical === 'fastapi.APIRouter') { - validRouterInstances.add(varInfo.varName) + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + const varInfo = extractVarNameAndInit(obj); + if (!varInfo?.varName || !varInfo.init) continue; + + if (varInfo.init.type === "CallExpression") { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap); + if ( + canonical === "fastapi.FastAPI" || + canonical === "fastapi.applications.FastAPI" + ) { + validFastApiInstances.add(varInfo.varName); + } else if ( + canonical === "fastapi.APIRouter" || + canonical === "fastapi.routing.APIRouter" + ) { + validRouterInstances.add(varInfo.varName); } } } } - return { validFastApiInstances, validRouterInstances } + return { validFastApiInstances, validRouterInstances }; } /** @@ -188,68 +224,72 @@ function processDecorator( filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], - entryPointSources: any[] + entryPointSources: any[], ): void { - if (!deco || deco.type !== 'CallExpression') return - const { callee } = deco + if (!deco || deco.type !== "CallExpression") return; + const { callee } = deco; - if (!callee || callee.type !== 'MemberAccess') return + if (!callee || callee.type !== "MemberAccess") return; - const methodName = callee.property?.name - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return + const methodName = callee.property?.name; + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; // Get router or app name - let routerName = '' - if (callee.object?.type === 'Identifier') { - routerName = callee.object.name + let routerName = ""; + if (callee.object?.type === "Identifier") { + routerName = callee.object.name; } // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances - const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) + const { validFastApiInstances, validRouterInstances } = validInstances; + const isValidRouter = + validFastApiInstances.has(routerName) || + validRouterInstances.has(routerName); - if (!isValidRouter) return + if (!isValidRouter) return; // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]) - const params = extractRouteParams(routePath) + const routePath = extractLiteralString(deco.arguments?.[0]); + const params = extractRouteParams(routePath); - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) - entryPoint.filePath = relativeFile - entryPoint.functionName = funcName - entryPoint.attribute = 'HTTP' + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); + entryPoint.filePath = relativeFile; + entryPoint.functionName = funcName; + entryPoint.attribute = "HTTP"; - entryPoints.push(entryPoint) + entryPoints.push(entryPoint); if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) - - if (filename !== relativeFile) { - const extra = findSourceOfFuncParam(filename, funcName, obj, undefined) - if (extra?.length) entryPointSources.push(...extra) - } + const paramSources = findSourceOfFuncParam( + relativeFile, + funcName, + obj, + undefined, + ); if (paramSources) { - entryPointSources.push(...paramSources) - const allScopeSources = paramSources.map((s: any) => ({ ...s, scopeFile: 'all' })) - entryPointSources.push(...allScopeSources) + const allScopeSources = paramSources.map((s: any) => ({ + ...s, + scopeFile: "all", + })); + entryPointSources.push(...allScopeSources); } if (params.length && Array.isArray(obj.parameters)) { for (const p of obj.parameters) { - const pn = p.id?.name + const pn = p.id?.name; if (pn && params.includes(pn)) { entryPointSources.push({ introPoint: 4, - kind: 'PYTHON_INPUT', + kind: "PYTHON_INPUT", path: pn, - scopeFile: 'all', + scopeFile: "all", scopeFunc: funcName, locStart: p.loc?.start?.line, locEnd: p.loc?.end?.line, locColumnStart: p.loc?.start?.column, locColumnEnd: p.loc?.end?.column, - }) + }); } } } @@ -262,40 +302,57 @@ function processDecorator( * @param dir * @returns */ -function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { - const entryPoints: EntryPoint[] = [] - const entryPointSources: any[] = [] +function findFastApiEntryPointAndSource( + filenameAstObj: FilenameAstMap, + dir: string, +): EntryPointResult { + const entryPoints: EntryPoint[] = []; + const entryPointSources: any[] = []; for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue - const fileObj = filenameAstObj[filename] - if (!fileObj?.body) continue + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) + continue; + const fileObj = filenameAstObj[filename]; + if (!fileObj?.body) continue; // Calculate relative path - const { body } = fileObj - const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename - - if (!relativeFile) continue - - const importMap = analyzeImports(body) - - let hasFastApiImport = false + const { body } = fileObj; + const relativeFile = filename.startsWith(dir) + ? extractRelativePath(filename, dir) + : filename; + + if (!relativeFile) continue; + + const importMap = analyzeImports(body); + + const validImports = new Set([ + "fastapi", + "fastapi.FastAPI", + "fastapi.APIRouter", + "fastapi.applications", + "fastapi.routing", + ]); + let hasFastApiImport = false; for (const val of importMap.values()) { - if (val === 'fastapi' || val.startsWith('fastapi.')) { - hasFastApiImport = true - break + if (validImports.has(val)) { + hasFastApiImport = true; + break; } } - if (!hasFastApiImport) continue + if (!hasFastApiImport) continue; - const validInstances = collectValidInstances(body, importMap) + const validInstances = collectValidInstances(body, importMap); for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; - if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { - const funcName = obj.id.name - const { decorators } = obj._meta + if ( + obj.type === "FunctionDefinition" && + obj._meta?.decorators && + obj.id?.name + ) { + const funcName = obj.id.name; + const { decorators } = obj._meta; for (const deco of decorators) { processDecorator( @@ -306,8 +363,8 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str filename, validInstances, entryPoints, - entryPointSources - ) + entryPointSources, + ); } } } @@ -316,7 +373,7 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str return { fastApiEntryPointArray: entryPoints, fastApiEntryPointSourceArray: entryPointSources, - } + }; } -export = { findFastApiEntryPointAndSource } +export = { findFastApiEntryPointAndSource }; From 812b3a0de4634b337848e4c3c77e626588f58bab Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 05:41:13 -0800 Subject: [PATCH 03/12] fix: update fastapi --- .../fastapi-entrypoint.ts | 517 +++++++++--------- 1 file changed, 259 insertions(+), 258 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index 8b395de6..30416a85 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -11,33 +11,34 @@ const { findSourceOfFuncParam } = PythonEntrypointSource; const EntryPointClass = require("../../../common/entrypoint"); interface ASTObject { - body?: any[]; - [key: string]: any; + body?: any[]; + + [key: string]: any; } interface FilenameAstMap { - [filename: string]: ASTObject; + [filename: string]: ASTObject; } interface ValidInstances { - validFastApiInstances: Set; - validRouterInstances: Set; + validFastApiInstances: Set; + validRouterInstances: Set; } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[]; - fastApiEntryPointSourceArray: any[]; + fastApiEntryPointArray: EntryPoint[]; + fastApiEntryPointSourceArray: any[]; } const ROUTE_DECORATORS = new Set([ - "get", - "post", - "put", - "delete", - "patch", - "options", - "head", - "route", + "get", + "post", + "put", + "delete", + "patch", + "options", + "head", + "route", ]); /** @@ -46,11 +47,11 @@ const ROUTE_DECORATORS = new Set([ * @returns */ function extractLiteralString(node: any): string | null { - if (!node) return null; - if (node.type === "Literal" && typeof node.value === "string") { - return node.value; - } - return null; + if (!node) return null; + if (node.type === "Literal" && typeof node.value === "string") { + return node.value; + } + return null; } /** @@ -59,15 +60,15 @@ function extractLiteralString(node: any): string | null { * @returns */ function extractRouteParams(route: string | null): string[] { - if (!route) return []; - const regex = /\{(.*?)\}/g; - const params: string[] = []; - let match: RegExpExecArray | null; - while ((match = regex.exec(route)) !== null) { - const name = match[1].split(":").pop(); - if (name) params.push(name); - } - return params; + if (!route) return []; + const regex = /\{(.*?)\}/g; + const params: string[] = []; + let match: RegExpExecArray | null; + while ((match = regex.exec(route)) !== null) { + const name = match[1].split(":").pop(); + if (name) params.push(name); + } + return params; } /** @@ -76,16 +77,16 @@ function extractRouteParams(route: string | null): string[] { * @returns */ function extractVarNameAndInit( - obj: any, + obj: any, ): { varName?: string; init?: any } | null { - try { - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - if (obj.left?.type === "Identifier") { - return { varName: obj.left.name, init: obj.right }; - } - } - } catch (error) { } - return null; + try { + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + if (obj.left?.type === "Identifier") { + return { varName: obj.left.name, init: obj.right }; + } + } + } catch (error) {} + return null; } /** @@ -94,51 +95,51 @@ function extractVarNameAndInit( * @returns */ function analyzeImports(body: any[]): Map { - const map = new Map(); - if (!Array.isArray(body)) return map; - - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - + const map = new Map(); + if (!Array.isArray(body)) return map; + + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + if ( + obj.type === "VariableDeclaration" && + obj.init?.type === "ImportExpression" + ) { + const importExpr = obj.init; + const localName = obj.id?.name; + if (!localName) continue; + + const fromValue = extractLiteralString(importExpr.from); + const importedName = importExpr.imported?.name; // Identifier + + if (fromValue) { + // from ... import ... + if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { + if (importedName) { + // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' + // (case: fastapi.applications) + map.set(localName, `fastapi.${importedName}`); + } + } + } else if ( + importedName === "fastapi" || + importedName === "fastapi.applications" || + importedName === "fastapi.routing" || + importedName?.startsWith("fastapi.") + ) { + // import fastapi or import fastapi.applications if ( - obj.type === "VariableDeclaration" && - obj.init?.type === "ImportExpression" + importedName === localName || + importedName.startsWith(`${localName}.`) ) { - const importExpr = obj.init; - const localName = obj.id?.name; - if (!localName) continue; - - const fromValue = extractLiteralString(importExpr.from); - const importedName = importExpr.imported?.name; // Identifier - - if (fromValue) { - // from ... import ... - if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { - if (importedName) { - // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' - // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`); - } - } - } else if ( - importedName === "fastapi" || - importedName === "fastapi.applications" || - importedName === "fastapi.routing" || - importedName?.startsWith("fastapi.") - ) { - // import fastapi or import fastapi.applications - if ( - importedName === localName || - importedName.startsWith(`${localName}.`) - ) { - map.set(localName, localName); - } else { - map.set(localName, importedName); - } - } + map.set(localName, localName); + } else { + map.set(localName, importedName); } + } } - return map; + } + return map; } /** @@ -148,21 +149,21 @@ function analyzeImports(body: any[]): Map { * @returns */ function resolveCanonicalName( - node: any, - importMap: Map, + node: any, + importMap: Map, ): string | null { - if (!node) return null; - if (node.type === "Identifier") { - return importMap.get(node.name) || null; + if (!node) return null; + if (node.type === "Identifier") { + return importMap.get(node.name) || null; + } + if (node.type === "MemberAccess") { + const objectCanonical = resolveCanonicalName(node.object, importMap); + const propertyName = node.property?.name; + if (objectCanonical && propertyName) { + return `${objectCanonical}.${propertyName}`; } - if (node.type === "MemberAccess") { - const objectCanonical = resolveCanonicalName(node.object, importMap); - const propertyName = node.property?.name; - if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}`; - } - } - return null; + } + return null; } /** @@ -172,37 +173,37 @@ function resolveCanonicalName( * @returns */ function collectValidInstances( - body: any[], - importMap: Map, + body: any[], + importMap: Map, ): ValidInstances { - const validFastApiInstances = new Set(); - const validRouterInstances = new Set(); + const validFastApiInstances = new Set(); + const validRouterInstances = new Set(); - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - - // Only process AssignmentExpression - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - const varInfo = extractVarNameAndInit(obj); - if (!varInfo?.varName || !varInfo.init) continue; - - if (varInfo.init.type === "CallExpression") { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap); - if ( - canonical === "fastapi.FastAPI" || - canonical === "fastapi.applications.FastAPI" - ) { - validFastApiInstances.add(varInfo.varName); - } else if ( - canonical === "fastapi.APIRouter" || - canonical === "fastapi.routing.APIRouter" - ) { - validRouterInstances.add(varInfo.varName); - } - } + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + // Only process AssignmentExpression + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + const varInfo = extractVarNameAndInit(obj); + if (!varInfo?.varName || !varInfo.init) continue; + + if (varInfo.init.type === "CallExpression") { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap); + if ( + canonical === "fastapi.FastAPI" || + canonical === "fastapi.applications.FastAPI" + ) { + validFastApiInstances.add(varInfo.varName); + } else if ( + canonical === "fastapi.APIRouter" || + canonical === "fastapi.routing.APIRouter" + ) { + validRouterInstances.add(varInfo.varName); } + } } - return { validFastApiInstances, validRouterInstances }; + } + return { validFastApiInstances, validRouterInstances }; } /** @@ -217,83 +218,83 @@ function collectValidInstances( * @param entryPointSources */ function processDecorator( - deco: any, - funcName: string, - obj: any, - relativeFile: string, - filename: string, - validInstances: ValidInstances, - entryPoints: EntryPoint[], - entryPointSources: any[], + deco: any, + funcName: string, + obj: any, + relativeFile: string, + filename: string, + validInstances: ValidInstances, + entryPoints: EntryPoint[], + entryPointSources: any[], ): void { - if (!deco || deco.type !== "CallExpression") return; - const { callee } = deco; - - if (!callee || callee.type !== "MemberAccess") return; - - const methodName = callee.property?.name; - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; - - // Get router or app name - let routerName = ""; - if (callee.object?.type === "Identifier") { - routerName = callee.object.name; + if (!deco || deco.type !== "CallExpression") return; + const { callee } = deco; + + if (!callee || callee.type !== "MemberAccess") return; + + const methodName = callee.property?.name; + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; + + // Get router or app name + let routerName = ""; + if (callee.object?.type === "Identifier") { + routerName = callee.object.name; + } + + // Validate router/app + const { validFastApiInstances, validRouterInstances } = validInstances; + const isValidRouter = + validFastApiInstances.has(routerName) || + validRouterInstances.has(routerName); + + if (!isValidRouter) return; + + // Create entrypoint + const routePath = extractLiteralString(deco.arguments?.[0]); + const params = extractRouteParams(routePath); + + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); + entryPoint.filePath = relativeFile; + entryPoint.functionName = funcName; + entryPoint.attribute = "HTTP"; + + entryPoints.push(entryPoint); + + if (entryPointAndSourceAtSameTime) { + const paramSources = findSourceOfFuncParam( + relativeFile, + funcName, + obj, + undefined, + ); + + if (paramSources) { + const allScopeSources = paramSources.map((s: any) => ({ + ...s, + scopeFile: "all", + })); + entryPointSources.push(...allScopeSources); } - // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances; - const isValidRouter = - validFastApiInstances.has(routerName) || - validRouterInstances.has(routerName); - - if (!isValidRouter) return; - - // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]); - const params = extractRouteParams(routePath); - - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); - entryPoint.filePath = relativeFile; - entryPoint.functionName = funcName; - entryPoint.attribute = "HTTP"; - - entryPoints.push(entryPoint); - - if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam( - relativeFile, - funcName, - obj, - undefined, - ); - - if (paramSources) { - const allScopeSources = paramSources.map((s: any) => ({ - ...s, - scopeFile: "all", - })); - entryPointSources.push(...allScopeSources); - } - - if (params.length && Array.isArray(obj.parameters)) { - for (const p of obj.parameters) { - const pn = p.id?.name; - if (pn && params.includes(pn)) { - entryPointSources.push({ - introPoint: 4, - kind: "PYTHON_INPUT", - path: pn, - scopeFile: "all", - scopeFunc: funcName, - locStart: p.loc?.start?.line, - locEnd: p.loc?.end?.line, - locColumnStart: p.loc?.start?.column, - locColumnEnd: p.loc?.end?.column, - }); - } - } + if (params.length && Array.isArray(obj.parameters)) { + for (const p of obj.parameters) { + const pn = p.id?.name; + if (pn && params.includes(pn)) { + entryPointSources.push({ + introPoint: 4, + kind: "PYTHON_INPUT", + path: pn, + scopeFile: "all", + scopeFunc: funcName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + locColumnStart: p.loc?.start?.column, + locColumnEnd: p.loc?.end?.column, + }); } + } } + } } /** @@ -303,77 +304,77 @@ function processDecorator( * @returns */ function findFastApiEntryPointAndSource( - filenameAstObj: FilenameAstMap, - dir: string, + filenameAstObj: FilenameAstMap, + dir: string, ): EntryPointResult { - const entryPoints: EntryPoint[] = []; - const entryPointSources: any[] = []; - - for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) - continue; - const fileObj = filenameAstObj[filename]; - if (!fileObj?.body) continue; - - // Calculate relative path - const { body } = fileObj; - const relativeFile = filename.startsWith(dir) - ? extractRelativePath(filename, dir) - : filename; - - if (!relativeFile) continue; - - const importMap = analyzeImports(body); - - const validImports = new Set([ - "fastapi", - "fastapi.FastAPI", - "fastapi.APIRouter", - "fastapi.applications", - "fastapi.routing", - ]); - let hasFastApiImport = false; - for (const val of importMap.values()) { - if (validImports.has(val)) { - hasFastApiImport = true; - break; - } - } - if (!hasFastApiImport) continue; - - const validInstances = collectValidInstances(body, importMap); - - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - - if ( - obj.type === "FunctionDefinition" && - obj._meta?.decorators && - obj.id?.name - ) { - const funcName = obj.id.name; - const { decorators } = obj._meta; - - for (const deco of decorators) { - processDecorator( - deco, - funcName, - obj, - relativeFile, - filename, - validInstances, - entryPoints, - entryPointSources, - ); - } - } + const entryPoints: EntryPoint[] = []; + const entryPointSources: any[] = []; + + for (const filename in filenameAstObj) { + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) + continue; + const fileObj = filenameAstObj[filename]; + if (!fileObj?.body) continue; + + // Calculate relative path + const { body } = fileObj; + const relativeFile = filename.startsWith(dir) + ? extractRelativePath(filename, dir) + : filename; + + if (!relativeFile) continue; + + const importMap = analyzeImports(body); + + const validImports = new Set([ + "fastapi", + "fastapi.FastAPI", + "fastapi.APIRouter", + "fastapi.applications", + "fastapi.routing", + ]); + let hasFastApiImport = false; + for (const val of importMap.values()) { + if (validImports.has(val)) { + hasFastApiImport = true; + break; + } + } + if (!hasFastApiImport) continue; + + const validInstances = collectValidInstances(body, importMap); + + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + if ( + obj.type === "FunctionDefinition" && + obj._meta?.decorators && + obj.id?.name + ) { + const funcName = obj.id.name; + const { decorators } = obj._meta; + + for (const deco of decorators) { + processDecorator( + deco, + funcName, + obj, + relativeFile, + filename, + validInstances, + entryPoints, + entryPointSources, + ); } + } } + } - return { - fastApiEntryPointArray: entryPoints, - fastApiEntryPointSourceArray: entryPointSources, - }; + return { + fastApiEntryPointArray: entryPoints, + fastApiEntryPointSourceArray: entryPointSources, + }; } export = { findFastApiEntryPointAndSource }; From 9d11215486b9b572b7a94180262f5053e2ed3d8e Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 19:46:16 -0800 Subject: [PATCH 04/12] fix: resolve FastAPI route parsing issues --- .../fastapi-entrypoint.ts | 417 +++++++----------- 1 file changed, 156 insertions(+), 261 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index 30416a85..de23f3d2 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -1,371 +1,266 @@ -import { extractRelativePath } from "../../../../../util/file-util"; -import * as Constant from "../../../../../util/constant"; -import { EntryPoint } from "../../../common/entrypoint"; +import { extractRelativePath } from '../../../../../util/file-util' +import * as Constant from '../../../../../util/constant' +import type { EntryPoint } from '../../../common/entrypoint' -const config = require("../../../../../config"); -const { entryPointAndSourceAtSameTime } = config; +// eslint-disable-next-line @typescript-eslint/no-var-requires +const config = require('../../../../../config') +// eslint-disable-next-line @typescript-eslint/no-var-requires +const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') +// eslint-disable-next-line @typescript-eslint/no-var-requires +const EntryPointClass = require('../../../common/entrypoint') -const PythonEntrypointSource = require("../../common/entrypoint-collector/python-entrypoint-source"); -const { findSourceOfFuncParam } = PythonEntrypointSource; - -const EntryPointClass = require("../../../common/entrypoint"); +const { entryPointAndSourceAtSameTime } = config +const { findSourceOfFuncParam } = PythonEntrypointSource interface ASTObject { - body?: any[]; - - [key: string]: any; + body?: any[] + [key: string]: any } interface FilenameAstMap { - [filename: string]: ASTObject; + [filename: string]: ASTObject } interface ValidInstances { - validFastApiInstances: Set; - validRouterInstances: Set; + validFastApiInstances: Set + validRouterInstances: Set } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[]; - fastApiEntryPointSourceArray: any[]; + fastApiEntryPointArray: EntryPoint[] + fastApiEntryPointSourceArray: any[] } -const ROUTE_DECORATORS = new Set([ - "get", - "post", - "put", - "delete", - "patch", - "options", - "head", - "route", -]); +const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) /** - * - * @param node - * @returns + * Extracts literal string value. + * @param node AST node + * @returns {string | null} String value or null */ function extractLiteralString(node: any): string | null { - if (!node) return null; - if (node.type === "Literal" && typeof node.value === "string") { - return node.value; + if (!node) return null + if (node.type === 'Literal' && typeof node.value === 'string') { + return node.value } - return null; + return null } /** - * - * @param route - * @returns + * Extracts variable name and init expression. + * @param obj AST node + * @returns {{ varName?: string; init?: any } | null} Variable info or null */ -function extractRouteParams(route: string | null): string[] { - if (!route) return []; - const regex = /\{(.*?)\}/g; - const params: string[] = []; - let match: RegExpExecArray | null; - while ((match = regex.exec(route)) !== null) { - const name = match[1].split(":").pop(); - if (name) params.push(name); +function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { + if (obj.type === 'AssignmentExpression' && obj.operator === '=' && obj.left?.type === 'Identifier') { + return { varName: obj.left.name, init: obj.right } } - return params; + return null } /** - * - * @param obj - * @returns - */ -function extractVarNameAndInit( - obj: any, -): { varName?: string; init?: any } | null { - try { - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - if (obj.left?.type === "Identifier") { - return { varName: obj.left.name, init: obj.right }; - } - } - } catch (error) {} - return null; -} - -/** - * - * @param body - * @returns + * Analyzes imports to build name map. + * @param body AST body + * @returns {Map} Import name map */ +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity function analyzeImports(body: any[]): Map { - const map = new Map(); - if (!Array.isArray(body)) return map; + const map = new Map() + if (!Array.isArray(body)) return map for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue - if ( - obj.type === "VariableDeclaration" && - obj.init?.type === "ImportExpression" - ) { - const importExpr = obj.init; - const localName = obj.id?.name; - if (!localName) continue; + if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { + const importExpr = obj.init + const localName = obj.id?.name + if (!localName) continue - const fromValue = extractLiteralString(importExpr.from); - const importedName = importExpr.imported?.name; // Identifier + const fromValue = extractLiteralString(importExpr.from) + const importedName = importExpr.imported?.name // Identifier if (fromValue) { // from ... import ... - if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { - if (importedName) { - // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' - // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`); - } - } - } else if ( - importedName === "fastapi" || - importedName === "fastapi.applications" || - importedName === "fastapi.routing" || - importedName?.startsWith("fastapi.") - ) { - // import fastapi or import fastapi.applications - if ( - importedName === localName || - importedName.startsWith(`${localName}.`) - ) { - map.set(localName, localName); - } else { - map.set(localName, importedName); + if ((fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) && importedName) { + // Use full path: fastapi.responses.ORJSONResponse instead of fastapi.ORJSONResponse + const canonicalName = fromValue === 'fastapi' ? `fastapi.${importedName}` : `${fromValue}.${importedName}` + map.set(localName, canonicalName) } + } else if (importedName === 'fastapi') { + // import fastapi + map.set(localName, 'fastapi') } } } - return map; + return map } /** - * - * @param node - * @param importMap - * @returns + * Resolves canonical name from node. + * @param node AST node + * @param importMap Import map + * @returns {string | null} Canonical name or null */ -function resolveCanonicalName( - node: any, - importMap: Map, -): string | null { - if (!node) return null; - if (node.type === "Identifier") { - return importMap.get(node.name) || null; +function resolveCanonicalName(node: any, importMap: Map): string | null { + if (!node) return null + if (node.type === 'Identifier') { + return importMap.get(node.name) || null } - if (node.type === "MemberAccess") { - const objectCanonical = resolveCanonicalName(node.object, importMap); - const propertyName = node.property?.name; + if (node.type === 'MemberAccess') { + const objectCanonical = resolveCanonicalName(node.object, importMap) + const propertyName = node.property?.name if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}`; + return `${objectCanonical}.${propertyName}` } } - return null; + return null } /** - * - * @param body - * @param importMap - * @returns + * Collects valid FastAPI instances. + * @param body AST body + * @param importMap Import map + * @returns {ValidInstances} Valid instances */ -function collectValidInstances( - body: any[], - importMap: Map, -): ValidInstances { - const validFastApiInstances = new Set(); - const validRouterInstances = new Set(); +// eslint-disable-next-line sonarjs/cognitive-complexity +function collectValidInstances(body: any[], importMap: Map): ValidInstances { + const validFastApiInstances = new Set() + const validRouterInstances = new Set() for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue // Only process AssignmentExpression - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - const varInfo = extractVarNameAndInit(obj); - if (!varInfo?.varName || !varInfo.init) continue; - - if (varInfo.init.type === "CallExpression") { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap); - if ( - canonical === "fastapi.FastAPI" || - canonical === "fastapi.applications.FastAPI" - ) { - validFastApiInstances.add(varInfo.varName); - } else if ( - canonical === "fastapi.APIRouter" || - canonical === "fastapi.routing.APIRouter" - ) { - validRouterInstances.add(varInfo.varName); + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + const varInfo = extractVarNameAndInit(obj) + if (!varInfo?.varName || !varInfo.init) continue + + if (varInfo.init.type === 'CallExpression') { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap) + if (canonical && canonical.startsWith('fastapi')) { + if (canonical.endsWith('.FastAPI')) { + validFastApiInstances.add(varInfo.varName) + } else if (canonical.endsWith('.APIRouter')) { + validRouterInstances.add(varInfo.varName) + } } } } } - return { validFastApiInstances, validRouterInstances }; + return { validFastApiInstances, validRouterInstances } } /** - * - * @param deco - * @param funcName - * @param obj - * @param relativeFile - * @param filename - * @param validInstances - * @param entryPoints - * @param entryPointSources + * Processes decorator for entry points. + * @param deco Decorator node + * @param funcName Function name + * @param obj Function node + * @param relativeFile Relative file path + * @param filename Absolute file path + * @param validInstances Valid instances + * @param entryPoints Entry points array + * @param entryPointSources Sources array */ +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity function processDecorator( deco: any, funcName: string, obj: any, relativeFile: string, - filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], - entryPointSources: any[], + entryPointSources: any[] ): void { - if (!deco || deco.type !== "CallExpression") return; - const { callee } = deco; + if (!deco || deco.type !== 'CallExpression') return + const { callee } = deco - if (!callee || callee.type !== "MemberAccess") return; + if (!callee || callee.type !== 'MemberAccess') return - const methodName = callee.property?.name; - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; + const methodName = callee.property?.name + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return // Get router or app name - let routerName = ""; - if (callee.object?.type === "Identifier") { - routerName = callee.object.name; + let routerName = '' + if (callee.object?.type === 'Identifier') { + routerName = callee.object.name } // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances; - const isValidRouter = - validFastApiInstances.has(routerName) || - validRouterInstances.has(routerName); + const { validFastApiInstances, validRouterInstances } = validInstances + const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) - if (!isValidRouter) return; + if (!isValidRouter) return // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]); - const params = extractRouteParams(routePath); + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) + entryPoint.filePath = relativeFile + entryPoint.functionName = funcName + entryPoint.attribute = 'HTTP' - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); - entryPoint.filePath = relativeFile; - entryPoint.functionName = funcName; - entryPoint.attribute = "HTTP"; - - entryPoints.push(entryPoint); + entryPoints.push(entryPoint) if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam( - relativeFile, - funcName, - obj, - undefined, - ); + const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) if (paramSources) { const allScopeSources = paramSources.map((s: any) => ({ ...s, - scopeFile: "all", - })); - entryPointSources.push(...allScopeSources); - } - - if (params.length && Array.isArray(obj.parameters)) { - for (const p of obj.parameters) { - const pn = p.id?.name; - if (pn && params.includes(pn)) { - entryPointSources.push({ - introPoint: 4, - kind: "PYTHON_INPUT", - path: pn, - scopeFile: "all", - scopeFunc: funcName, - locStart: p.loc?.start?.line, - locEnd: p.loc?.end?.line, - locColumnStart: p.loc?.start?.column, - locColumnEnd: p.loc?.end?.column, - }); - } - } + scopeFile: 'all', + })) + entryPointSources.push(...allScopeSources) } } } /** - * - * @param filenameAstObj - * @param dir - * @returns + * Finds FastAPI entry points and sources. + * @param filenameAstObj Filename to AST map + * @param dir Root directory + * @returns {EntryPointResult} Entry points and sources */ -function findFastApiEntryPointAndSource( - filenameAstObj: FilenameAstMap, - dir: string, -): EntryPointResult { - const entryPoints: EntryPoint[] = []; - const entryPointSources: any[] = []; +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity +function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { + const entryPoints: EntryPoint[] = [] + const entryPointSources: any[] = [] for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) - continue; - const fileObj = filenameAstObj[filename]; - if (!fileObj?.body) continue; + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue + const fileObj = filenameAstObj[filename] + if (!fileObj?.body) continue // Calculate relative path - const { body } = fileObj; - const relativeFile = filename.startsWith(dir) - ? extractRelativePath(filename, dir) - : filename; - - if (!relativeFile) continue; - - const importMap = analyzeImports(body); - - const validImports = new Set([ - "fastapi", - "fastapi.FastAPI", - "fastapi.APIRouter", - "fastapi.applications", - "fastapi.routing", - ]); - let hasFastApiImport = false; + const { body } = fileObj + const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename + + if (!relativeFile) continue + + const importMap = analyzeImports(body) + + // Only scan if core components (FastAPI or APIRouter) are imported + // Only scan if core components (FastAPI or APIRouter) are imported + let hasCoreImport = false for (const val of importMap.values()) { - if (validImports.has(val)) { - hasFastApiImport = true; - break; + if ( + val === 'fastapi' || + (val.startsWith('fastapi') && (val.endsWith('.FastAPI') || val.endsWith('.APIRouter'))) + ) { + hasCoreImport = true + break } } - if (!hasFastApiImport) continue; + if (!hasCoreImport) continue - const validInstances = collectValidInstances(body, importMap); + const validInstances = collectValidInstances(body, importMap) for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue - if ( - obj.type === "FunctionDefinition" && - obj._meta?.decorators && - obj.id?.name - ) { - const funcName = obj.id.name; - const { decorators } = obj._meta; + if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { + const funcName = obj.id.name + const { decorators } = obj._meta for (const deco of decorators) { - processDecorator( - deco, - funcName, - obj, - relativeFile, - filename, - validInstances, - entryPoints, - entryPointSources, - ); + processDecorator(deco, funcName, obj, relativeFile, validInstances, entryPoints, entryPointSources) } } } @@ -374,7 +269,7 @@ function findFastApiEntryPointAndSource( return { fastApiEntryPointArray: entryPoints, fastApiEntryPointSourceArray: entryPointSources, - }; + } } -export = { findFastApiEntryPointAndSource }; +export = { findFastApiEntryPointAndSource } From 30fdbdc637e64c3e5510cae533b4ce9b4c48ab50 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 23:23:44 -0800 Subject: [PATCH 05/12] fix:FastAPI entrypoint --- .../entrypoint-collector/fastapi-entrypoint.ts | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index de23f3d2..748c88fd 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -167,6 +167,7 @@ function processDecorator( funcName: string, obj: any, relativeFile: string, + filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], entryPointSources: any[] @@ -200,14 +201,10 @@ function processDecorator( entryPoints.push(entryPoint) if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) + const paramSources = findSourceOfFuncParam(filename, funcName, obj, undefined) if (paramSources) { - const allScopeSources = paramSources.map((s: any) => ({ - ...s, - scopeFile: 'all', - })) - entryPointSources.push(...allScopeSources) + entryPointSources.push(...paramSources) } } } @@ -260,7 +257,7 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str const { decorators } = obj._meta for (const deco of decorators) { - processDecorator(deco, funcName, obj, relativeFile, validInstances, entryPoints, entryPointSources) + processDecorator(deco, funcName, obj, relativeFile, filename, validInstances, entryPoints, entryPointSources) } } } From e61be5fe03d07e226c5241bd314e5ab6ff77be5a Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 8 Dec 2025 00:46:38 -0800 Subject: [PATCH 06/12] feat: add Tornado checker --- resource/checker/checker-config.json | 5 + resource/checker/checker-pack-config.json | 2 + .../taint/python/tornado-taint-checker.ts | 768 ++++++++++++++++++ src/checker/taint/python/tornado-util.ts | 222 +++++ 4 files changed, 997 insertions(+) create mode 100644 src/checker/taint/python/tornado-taint-checker.ts create mode 100644 src/checker/taint/python/tornado-util.ts diff --git a/resource/checker/checker-config.json b/resource/checker/checker-config.json index 5e28f533..fad64657 100644 --- a/resource/checker/checker-config.json +++ b/resource/checker/checker-config.json @@ -135,6 +135,11 @@ "checkerPath": "checker/taint/python/django-taint-checker.ts", "description": "python Django框架 entrypoint采集以及框架source添加" }, + { + "checkerId": "taint_flow_python_tornado_input", + "checkerPath": "checker/taint/python/tornado-taint-checker.ts", + "description": "python Tornado框架 entrypoint采集以及框架source添加" + }, { "checkerId": "taint_flow_test", "checkerPath": "checker/taint/test-taint-checker.ts", diff --git a/resource/checker/checker-pack-config.json b/resource/checker/checker-pack-config.json index 2380e197..580fdd35 100644 --- a/resource/checker/checker-pack-config.json +++ b/resource/checker/checker-pack-config.json @@ -86,6 +86,7 @@ "checkerIds": [ "taint_flow_python_input", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], @@ -96,6 +97,7 @@ "checkerIds": [ "taint_flow_python_input_inner", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts new file mode 100644 index 00000000..30d55ad7 --- /dev/null +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -0,0 +1,768 @@ +import type { FileCache, RoutePair } from './tornado-util' + +const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') +const { extractRelativePath } = require('../../../util/file-util') +const AstUtil = require('../../../util/ast-util') +const Config = require('../../../config') +const completeEntryPoint = require('../common-kit/entry-points-util') +const logger = require('../../../util/logger')(__filename) +const { + isTornadoCall, + parseRoutePair, + resolveImportPath, + extractImportEntries, + extractParamsFromAst, + tornadoSourceAPIs, + passthroughFuncs, +} = require('./tornado-util') + +/** + * Tornado Taint Checker Base Class + */ +class TornadoTaintChecker extends PythonTaintAbstractChecker { + private fileCache = new Map() + + /** + * Helper function to mark a value as tainted + * @param value + */ + private markAsTainted(value: any): void { + if (!value) return + if (!value._tags) { + value._tags = new Set() + } + value._tags.add('PYTHON_INPUT') + value.hasTagRec = true + } + + /** + * + * @param resultManager + */ + constructor(resultManager: any) { + super(resultManager, 'taint_flow_python_tornado_input') + } + + /** + * trigger at start of analyze + * Register sourceScope values as sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtStartOfAnalyze( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) + const BasicRuleHandler = require('../../common/rules-basic-handler') + // 尝试从命令行参数获取 ruleConfigFile + let { ruleConfigFile } = Config + if (!ruleConfigFile || ruleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + ruleConfigFile = args[ruleConfigIndex + 1] + const path = require('path') + ruleConfigFile = path.isAbsolute(ruleConfigFile) + ? ruleConfigFile + : path.resolve(process.cwd(), ruleConfigFile) + } + } + try { + let ruleConfigContent: any[] = [] + if (ruleConfigFile && ruleConfigFile !== '') { + const FileUtil = require('../../../util/file-util') + ruleConfigContent = FileUtil.loadJSONfile(ruleConfigFile) + } else { + ruleConfigContent = BasicRuleHandler.getRules() + } + if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + for (const ruleConfig of ruleConfigContent) { + if ( + ruleConfig.checkerIds && + ((Array.isArray(ruleConfig.checkerIds) && + ruleConfig.checkerIds.length > 0 && + ruleConfig.checkerIds.includes(this.getCheckerId())) || + ruleConfig.checkerIds === this.getCheckerId()) + ) { + const { mergeAToB } = require('../../../util/common-util') + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + } + } + } + } catch (e: any) { + logger.warn(`Error reloading rule config: ${e?.message || e}`) + } + // 注册 sourceScope 中的 source + this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) + // 注册规则配置中的 source + this.addSourceTagForcheckerRuleConfigContent( + 'PYTHON_INPUT', + this.checkerRuleConfigContent, + ) + } + + /** + * Build a light-weight file cache for quick lookup. + * @param analyzer + * @param scope + * @param node + * @param _state + * @param _info + */ + triggerAtCompileUnit( + analyzer: any, + scope: any, + node: any, + _state: any, + _info: any, + ): boolean | undefined { + const fileName = node.loc?.sourcefile + if (!fileName) return + + const cache: FileCache = { + vars: new Map(), + classes: new Map(), + importedSymbols: new Map(), + } + + AstUtil.visit(node, { + AssignmentExpression: (n: any) => { + if (n.left?.type === 'Identifier' && n.left.name) { + cache.vars.set(n.left.name, { value: n.right, file: fileName }) + } + return true + }, + VariableDeclaration: (n: any) => { + const localName = n.id?.name + if (!localName) return true + if (n.init?.type === 'ImportExpression') { + const modulePath = n.init.from?.value || n.init.from?.name + if (!modulePath) return true + const resolved = resolveImportPath(modulePath, fileName) + if (!resolved) return true + const entries = extractImportEntries(n) + for (const entry of entries) { + if (!entry.local) continue + cache.importedSymbols.set(entry.local, { + file: resolved, + originalName: entry.imported, + }) + } + return true + } + if (n.init) { + cache.vars.set(localName, { value: n.init, file: fileName }) + } + return true + }, + ClassDefinition: (n: any) => { + const name = n.name?.name || n.id?.name + if (name) { + cache.classes.set(name, { value: n, file: fileName }) + } + return true + }, + }) + + this.fileCache.set(fileName, cache) + } + + /** + * On function calls, detect tornado Application/add_handlers and collect routes. + * @param analyzer + * @param scope + * @param node + * @param state + * @param _info + */ + triggerAtFuncCallSyntax( + analyzer: any, + scope: any, + node: any, + state: any, + _info: any, + ): boolean | undefined { + const fileName = node.loc?.sourcefile + if (!fileName) return + + // Application(...) -> first arg is routes + if (isTornadoCall(node, 'Application')) { + const routeList = node.arguments?.[0] + if (routeList) { + this.collectTornadoEntrypointAndSource( + analyzer, + scope, + state, + routeList, + fileName, + ) + } + } + + // add_handlers(host, routes) -> second arg is routes + if (isTornadoCall(node, 'add_handlers')) { + const routeList = node.arguments?.[1] + if (routeList) { + this.collectTornadoEntrypointAndSource( + analyzer, + scope, + state, + routeList, + fileName, + ) + } + } + } + + /** + * Override triggerAtIdentifier to mark path parameters as sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtIdentifier( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 先调用基类方法 + super.triggerAtIdentifier(analyzer, scope, node, state, info) + // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 + const { res } = info + if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { + for (const val of this.sourceScope.value) { + if ( + val.path === node.name || + res._sid === val.path || + res._qid === val.path + ) { + // 检查作用域匹配 + const nodeStart = node.loc?.start?.line + const nodeEnd = node.loc?.end?.line + const valStart = val.locStart + const valEnd = val.locEnd + let shouldMark = false + if ( + valStart === 'all' && + valEnd === 'all' && + val.scopeFile === 'all' && + val.scopeFunc === 'all' + ) { + shouldMark = true + } else if ( + valStart === 'all' && + valEnd === 'all' && + val.scopeFile !== 'all' && + val.scopeFunc === 'all' && + typeof node.loc?.sourcefile === 'string' && + node.loc.sourcefile.includes(val.scopeFile) + ) { + shouldMark = true + } else if ( + node.loc?.sourcefile && + val.scopeFile && + node.loc.sourcefile.includes(val.scopeFile) && + typeof nodeStart === 'number' && + typeof valStart === 'number' && + typeof nodeEnd === 'number' && + typeof valEnd === 'number' && + nodeStart >= valStart && + nodeEnd <= valEnd + ) { + shouldMark = true + } + if (shouldMark && (!res._tags || !res._tags.has('PYTHON_INPUT'))) { + if (!res._tags) { + res._tags = new Set() + } + res._tags.add('PYTHON_INPUT') + res.hasTagRec = true + } + } + } + } + } + + /** + * Override checkByNameMatch to support partial matching (e.g., os.system matches syslib_from.os.system) + * @param node + * @param fclos + * @param argvalues + */ + checkByNameMatch(node: any, fclos: any, argvalues: any) { + const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink + if (!rules || rules.length === 0) { + return + } + const callFull = this.getObj(fclos) + if (!callFull) { + super.checkByNameMatch(node, fclos, argvalues) + return + } + // 检查是否有匹配的规则(支持部分匹配) + const matchedRule = rules.find((rule: any) => { + if (typeof rule.fsig !== 'string') return false + return ( + rule.fsig === callFull || + callFull.endsWith(`.${rule.fsig}`) || + callFull.endsWith(rule.fsig) + ) + }) + // 如果有匹配的规则,调用基类方法处理 + if (matchedRule) { + super.checkByNameMatch(node, fclos, argvalues) + } + } + + /** + * Handle API calls like self.get_argument() + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallAfter( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 先调用基类方法处理规则配置中的 source + super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) + + const { fclos, ret } = info + if (!fclos || !ret) { + return + } + + // 从 node.callee 获取方法名(对于 MemberAccess 调用,如 self.get_argument) + let funcName: string | null = null + if (node.callee?.type === 'MemberAccess') { + funcName = node.callee.property?.name + } else if (node.callee?.type === 'Identifier') { + funcName = node.callee.name + } + + // 检查是否是 tornado source API 调用(如 get_argument) + if (funcName && tornadoSourceAPIs.has(funcName)) { + this.markAsTainted(ret) + } + + // 处理 passthrough 函数(如 decode, strip 等) + if (funcName && passthroughFuncs.has(funcName)) { + // 检查是否是 self.request.body.decode 等 source + // 对于 self.request.body.decode('utf-8'),AST 结构: + // node.callee.object.type = 'MemberAccess' (body) + // node.callee.object.object.type = 'MemberAccess' (request) + // node.callee.object.object.object.name = 'self' + if (node.callee?.type === 'MemberAccess' && node.callee.object) { + const bodyNode = node.callee.object + if ( + bodyNode.type === 'MemberAccess' && + bodyNode.property?.name === 'body' && + bodyNode.object?.type === 'MemberAccess' && + bodyNode.object.property?.name === 'request' && + bodyNode.object.object?.name === 'self' + ) { + // 直接标记返回值为 source(因为 self.request.body 是 source) + this.markAsTainted(ret) + return // 已经标记,不需要再检查 receiver + } + } + // 检查 receiver 是否被污染 + const receiver = fclos?.object || fclos?._this + if ( + receiver && + (receiver.taint || + receiver.hasTagRec || + receiver._tags?.has('PYTHON_INPUT')) + ) { + this.markAsTainted(ret) + } + } + } + + /** + * Handle Member Access Sources like self.request.body + * [Fixed]: Now checks AST node structure instead of symbolic result + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtMemberAccess( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + const { res } = info + + if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { + const propName = node.property?.name + const subPropName = node.object?.property?.name + const baseObjName = node.object?.object?.name + + if ( + baseObjName === 'self' && + subPropName === 'request' && + ['body', 'query', 'headers', 'cookies'].includes(propName) + ) { + this.markAsTainted(res) + } + } + } + + /** + * Resolve symbol cross-file + * @param name + * @param currentFile + */ + private resolveSymbol(name: string, currentFile: string): any | null { + if (!name || !currentFile) return null + const cache = this.fileCache.get(currentFile) + if (!cache) return null + const { vars, classes, importedSymbols } = cache + if (vars.has(name)) { + const entry = vars.get(name) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + if (classes.has(name)) { + const entry = classes.get(name) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + + const importInfo = importedSymbols.get(name) + if (!importInfo) return null + const targetCache = this.fileCache.get(importInfo.file) + if (!targetCache) return null + const targetName = importInfo.originalName || name + if (targetCache.vars.has(targetName)) { + const entry = targetCache.vars.get(targetName) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + if (targetCache.classes.has(targetName)) { + const entry = targetCache.classes.get(targetName) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + return null + } + + /** + * Flatten route lists (handles BinaryExpression +) + * @param node + * @param currentFile + */ + private normalizeRoutes(node: any, currentFile: string): RoutePair[] { + if (!node) return [] + + if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { + const elements = node.elements || [] + return elements.flatMap((element: any) => + this.normalizeRoutes(element, currentFile), + ) + } + + if (node.type === 'BinaryExpression') { + return [ + ...this.normalizeRoutes(node.left, currentFile), + ...this.normalizeRoutes(node.right, currentFile), + ] + } + + if (node.type === 'ObjectExpression') { + const values = + node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] + return values.flatMap((value: any) => + this.normalizeRoutes(value, node.loc?.sourcefile || currentFile), + ) + } + + if (node.type === 'Identifier') { + const target = this.resolveSymbol(node.name, currentFile) + if (!target) return [] + const targetFile = target.loc?.sourcefile || currentFile + return this.normalizeRoutes(target, targetFile) + } + + const pair = parseRoutePair(node) + return pair ? [{ ...pair, file: currentFile }] : [] + } + + /** + * Analyze routes and emit entrypoints & sources + * @param analyzer + * @param scope + * @param state + * @param routeList + * @param currentFile + */ + private collectTornadoEntrypointAndSource( + analyzer: any, + scope: any, + state: any, + routeList: any, + currentFile: string, + ) { + const processed = new Set() + const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) + for (const pair of normalizedRoutes) { + if (!pair.path || !pair.handlerName) { + continue + } + const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` + if (processed.has(dedupKey)) { + continue + } + processed.add(dedupKey) + const classAst = this.resolveSymbol( + pair.handlerName, + pair.file || currentFile, + ) + if (!classAst || classAst.type !== 'ClassDefinition') { + continue + } + const classFile = classAst.loc?.sourcefile || pair.file || currentFile + // 使用 analyzer.processInstruction 来处理类对象,确保有正确的结构 + let handlerSymVal: any + try { + handlerSymVal = analyzer.processInstruction(scope, classAst, state) + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + } catch (e) { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + // 确保 handlerSymVal 有 field 结构 + if ( + handlerSymVal && + handlerSymVal.vtype === 'class' && + !handlerSymVal.field + ) { + handlerSymVal.field = {} + } + this.emitHandlerEntrypoints( + analyzer, + handlerSymVal, + pair.path, + classAst, + scope, + state, + ) + } + } + + /** + * Register EntryPoints and Path Param Sources + * [Fixed]: Removed Config check to forcefully register parameters as sources + * @param analyzer + * @param handlerSymVal + * @param urlPattern + * @param classAst + * @param scope + * @param state + */ + private emitHandlerEntrypoints( + analyzer: any, + handlerSymVal: any, + urlPattern: string, + classAst: any, + scope?: any, + state?: any, + ) { + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + return + } + const httpMethods = new Set([ + 'get', + 'post', + 'put', + 'delete', + 'patch', + 'head', + 'options', + ]) + const entrypoints = Object.entries(handlerSymVal.value) + .filter( + ([key, value]: [string, any]) => + httpMethods.has(key) && value.vtype === 'fclos', + ) + .map(([, value]: [string, any]) => value) + + for (const ep of entrypoints as any[]) { + // ignore init files + if (ep.fdef?.loc?.sourcefile?.endsWith('__init__.py')) { + continue + } + + // 尝试使用 analyzer.processInstruction 获取正确的 fclos 对象 + let finalEp = ep + if (scope && state && ep.fdef) { + try { + const processedFclos = analyzer.processInstruction( + scope, + ep.fdef, + state, + ) + if (processedFclos && processedFclos.vtype === 'fclos') { + processedFclos.parent = handlerSymVal + processedFclos.params = ep.params || extractParamsFromAst(ep.fdef) + if (!processedFclos.value) { + processedFclos.value = {} + } + finalEp = processedFclos + } + } catch (e) { + // fallback to original ep + } + } + // 确保 ep 有 value 属性 + if (!finalEp.value) { + finalEp.value = {} + } + + // 确保 finalEp.parent 正确设置,并且 handlerSymVal 有 field 结构 + if (handlerSymVal && handlerSymVal.vtype === 'class') { + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + finalEp.parent = handlerSymVal + } + + try { + // 确保 finalEp 有 completeEntryPoint 需要的属性 + if (!finalEp.ast && finalEp.fdef) { + finalEp.ast = finalEp.fdef + } + if (!finalEp.functionName) { + finalEp.functionName = + finalEp.fdef?.name?.name || + finalEp.fdef?.id?.name || + finalEp.name || + '' + } + // 确保 finalEp 有 filePath + if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { + const FileUtil = require('../../../util/file-util') + const { sourcefile } = finalEp.fdef.loc + if (Config.maindir && typeof Config.maindir === 'string') { + finalEp.filePath = FileUtil.extractRelativePath( + sourcefile, + Config.maindir, + ) + } else { + finalEp.filePath = sourcefile + } + } + const entryPoint = completeEntryPoint(finalEp) + // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 + if ( + entryPoint.entryPointSymVal?.parent && + entryPoint.entryPointSymVal.parent.vtype === 'class' && + !entryPoint.entryPointSymVal.parent.field + ) { + entryPoint.entryPointSymVal.parent.field = {} + } + analyzer.entryPoints.push(entryPoint) + } catch (e: any) { + logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) + continue + } + // 注册参数为 source + const funcName = + finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = + finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + let scopeFile: string | null = null + if (sourceFile) { + if (Config.maindir && typeof Config.maindir === 'string') { + scopeFile = extractRelativePath(sourceFile, Config.maindir) + } else { + scopeFile = sourceFile + } + } + + const paramMetas = + (Array.isArray((finalEp as any).params) && + (finalEp as any).params.length + ? (finalEp as any).params + : extractParamsFromAst(finalEp.fdef)) || [] + if (paramMetas.length > 0) { + for (const meta of paramMetas) { + if (meta.name === 'self') continue + + this.sourceScope.value.push({ + path: meta.name, + kind: 'PYTHON_INPUT', + scopeFile, + scopeFunc: funcName || 'all', + locStart: meta.locStart, + locEnd: meta.locEnd, + }) + } + } + } + } + + /** + * + * @param classNode + */ + private buildClassSymbol(classNode: any): any { + const value: any = {} + const members = classNode.body || [] + members.forEach((member: any) => { + if (member.type !== 'FunctionDefinition') return + const memberName = + member.name?.name || member.name?.id?.name || member.id?.name + if (memberName) { + value[memberName] = { + vtype: 'fclos', + fdef: member, + ast: member, + params: extractParamsFromAst(member), + } + } + }) + return { vtype: 'class', value } + } +} + +export = TornadoTaintChecker diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts new file mode 100644 index 00000000..2ee70cc7 --- /dev/null +++ b/src/checker/taint/python/tornado-util.ts @@ -0,0 +1,222 @@ +const path = require('path') +const AstUtil = require('../../../util/ast-util') + +export interface ImportSymbol { + file: string + originalName?: string +} + +export interface RoutePair { + path: string + handlerName: string + file?: string +} + +export interface FileCache { + vars: Map + classes: Map + importedSymbols: Map +} + +export interface ParamMeta { + name: string + locStart: number | 'all' + locEnd: number | 'all' +} + +export const tornadoSourceAPIs = new Set([ + 'get_argument', + 'get_query_argument', + 'get_body_argument', + 'get_query_arguments', + 'get_body_arguments', + 'get_cookie', + 'get_secure_cookie', +]) + +export const passthroughFuncs = new Set([ + 'decode', + 'strip', + 'replace', + 'lower', + 'upper', + 'split', +]) + +/** + * + * @param node + */ +export function isRequestAttributeAccess(node: any): boolean { + if (node?.type !== 'MemberAccess') return false + const propName = node.property?.name + const inner = node.object + if (inner?.type !== 'MemberAccess') return false + const baseName = inner.object?.name + const requestName = inner.property?.name + return ( + baseName === 'self' && + requestName === 'request' && + ['body', 'query', 'headers', 'cookies'].includes(propName) + ) +} + +/** + * + * @param expr + */ +export function isRequestAttributeExpression(expr: any): boolean { + if (!expr) return false + if (expr.type === 'MemberAccess') return isRequestAttributeAccess(expr) + if (expr.type === 'CallExpression' && expr.callee?.type === 'MemberAccess') { + return isRequestAttributeAccess(expr.callee.object) + } + return false +} + +/** + * + * @param node + * @param targetName + */ +export function isTornadoCall(node: any, targetName: string): boolean { + if (!node || node.type !== 'CallExpression' || !node.callee) return false + const { callee } = node + if (callee.type === 'MemberAccess' && callee.property?.name === targetName) { + return true + } + if (callee.type === 'Identifier' && callee.name === targetName) { + return true + } + return false +} + +/** + * + * @param route + */ +export function parseRoutePair(route: any): RoutePair | null { + if (!route) return null + + const extractLiteral = (expr: any): string | null => { + if (!expr) return null + if (expr.type === 'StringLiteral' || expr.type === 'Literal') { + return typeof expr.value === 'string' ? expr.value : null + } + return null + } + + let pathExpr: any + let handlerNode: any + + if (route.type === 'TupleExpression' && Array.isArray(route.elements)) { + const [first, second] = route.elements + pathExpr = first + handlerNode = second + } else if (route.type === 'CallExpression' && route.callee) { + const { callee } = route + const isUrlHelper = + (callee.type === 'Identifier' && callee.name === 'url') || + (callee.type === 'MemberAccess' && + AstUtil.prettyPrint(callee).includes('url')) + if (isUrlHelper && Array.isArray(route.arguments)) { + const [first, second] = route.arguments + pathExpr = first + handlerNode = second + } + } + if (!pathExpr || !handlerNode || handlerNode.type !== 'Identifier') { + return null + } + const pathValue = extractLiteral(pathExpr) + if (!pathValue) return null + + return { path: pathValue, handlerName: handlerNode.name } +} + +/** + * + * @param modulePath + * @param currentFile + */ +export function resolveImportPath( + modulePath: string, + currentFile: string, +): string | null { + if (!modulePath) return null + const currentDir = path.dirname(currentFile) + const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' + let baseDir = currentDir + if (leadingDots.length > 0) { + baseDir = path.resolve(currentDir, '../'.repeat(leadingDots.length - 1)) + } + const remainder = modulePath.slice(leadingDots.length) + const normalized = remainder ? remainder.split('.').join(path.sep) : '' + const resolved = normalized ? path.resolve(baseDir, normalized) : baseDir + return `${resolved}.py` +} + +/** + * + * @param stmt + */ +export function extractImportEntries( + stmt: any, +): Array<{ local: string; imported?: string }> { + const res: Array<{ local: string; imported?: string }> = [] + const { init } = stmt + if (!init) return res + + if (Array.isArray(init?.imports) && init.imports.length > 0) { + for (const spec of init.imports) { + const local = + spec.local?.name || spec.local?.value || spec.name || spec.value + const imported = + spec.imported?.name || spec.imported?.value || spec.name || spec.value + if (local) res.push({ local, imported }) + } + return res + } + + if (stmt.id?.name) { + const importedName = + init?.imported?.name || + init?.imported?.value || + init?.name?.name || + init?.name?.value + res.push({ local: stmt.id.name, imported: importedName }) + } + return res +} + +/** + * + * @param funcNode + */ +export function extractParamsFromAst(funcNode: any): ParamMeta[] { + if (!funcNode) return [] + const rawParams = Array.isArray(funcNode?.parameters?.parameters) + ? funcNode.parameters.parameters + : Array.isArray(funcNode?.parameters) + ? funcNode.parameters + : [] + const fallbackLine = + typeof funcNode?.loc?.start?.line === 'number' + ? funcNode.loc.start.line + : 'all' + const result: ParamMeta[] = [] + for (const param of rawParams) { + const name = param?.id?.name || param?.name + if (!name) continue + const locStart = + typeof param?.loc?.start?.line === 'number' + ? param.loc.start.line + : fallbackLine + const locEnd = + typeof param?.loc?.end?.line === 'number' + ? param.loc.end.line + : fallbackLine + result.push({ name, locStart, locEnd }) + } + return result +} From f32f9fbe0da122f46534b8e57afc769c11ab4f90 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 8 Dec 2025 01:11:50 -0800 Subject: [PATCH 07/12] feat: Tornado checker --- .../taint/python/tornado-taint-checker.ts | 183 +++--------------- src/checker/taint/python/tornado-util.ts | 54 ++---- 2 files changed, 43 insertions(+), 194 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 30d55ad7..fa905eba 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -52,13 +52,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtStartOfAnalyze( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) const BasicRuleHandler = require('../../common/rules-basic-handler') // 尝试从命令行参数获取 ruleConfigFile @@ -69,9 +63,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { ruleConfigFile = args[ruleConfigIndex + 1] const path = require('path') - ruleConfigFile = path.isAbsolute(ruleConfigFile) - ? ruleConfigFile - : path.resolve(process.cwd(), ruleConfigFile) + ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) } } try { @@ -102,10 +94,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) // 注册规则配置中的 source - this.addSourceTagForcheckerRuleConfigContent( - 'PYTHON_INPUT', - this.checkerRuleConfigContent, - ) + this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } /** @@ -116,13 +105,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param _state * @param _info */ - triggerAtCompileUnit( - analyzer: any, - scope: any, - node: any, - _state: any, - _info: any, - ): boolean | undefined { + triggerAtCompileUnit(analyzer: any, scope: any, node: any, _state: any, _info: any): boolean | undefined { const fileName = node.loc?.sourcefile if (!fileName) return @@ -182,13 +165,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param _info */ - triggerAtFuncCallSyntax( - analyzer: any, - scope: any, - node: any, - state: any, - _info: any, - ): boolean | undefined { + triggerAtFuncCallSyntax(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { const fileName = node.loc?.sourcefile if (!fileName) return @@ -196,13 +173,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isTornadoCall(node, 'Application')) { const routeList = node.arguments?.[0] if (routeList) { - this.collectTornadoEntrypointAndSource( - analyzer, - scope, - state, - routeList, - fileName, - ) + this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) } } @@ -210,13 +181,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isTornadoCall(node, 'add_handlers')) { const routeList = node.arguments?.[1] if (routeList) { - this.collectTornadoEntrypointAndSource( - analyzer, - scope, - state, - routeList, - fileName, - ) + this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) } } } @@ -229,36 +194,21 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtIdentifier( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any): void { // 先调用基类方法 super.triggerAtIdentifier(analyzer, scope, node, state, info) // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 const { res } = info if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { for (const val of this.sourceScope.value) { - if ( - val.path === node.name || - res._sid === val.path || - res._qid === val.path - ) { + if (val.path === node.name || res._sid === val.path || res._qid === val.path) { // 检查作用域匹配 const nodeStart = node.loc?.start?.line const nodeEnd = node.loc?.end?.line const valStart = val.locStart const valEnd = val.locEnd let shouldMark = false - if ( - valStart === 'all' && - valEnd === 'all' && - val.scopeFile === 'all' && - val.scopeFunc === 'all' - ) { + if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') { shouldMark = true } else if ( valStart === 'all' && @@ -313,11 +263,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 检查是否有匹配的规则(支持部分匹配) const matchedRule = rules.find((rule: any) => { if (typeof rule.fsig !== 'string') return false - return ( - rule.fsig === callFull || - callFull.endsWith(`.${rule.fsig}`) || - callFull.endsWith(rule.fsig) - ) + return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) || callFull.endsWith(rule.fsig) }) // 如果有匹配的规则,调用基类方法处理 if (matchedRule) { @@ -333,13 +279,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtFunctionCallAfter( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { // 先调用基类方法处理规则配置中的 source super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) @@ -384,12 +324,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this - if ( - receiver && - (receiver.taint || - receiver.hasTagRec || - receiver._tags?.has('PYTHON_INPUT')) - ) { + if (receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT'))) { this.markAsTainted(ret) } } @@ -404,13 +339,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtMemberAccess( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { const { res } = info if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { @@ -489,24 +418,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { const elements = node.elements || [] - return elements.flatMap((element: any) => - this.normalizeRoutes(element, currentFile), - ) + return elements.flatMap((element: any) => this.normalizeRoutes(element, currentFile)) } if (node.type === 'BinaryExpression') { - return [ - ...this.normalizeRoutes(node.left, currentFile), - ...this.normalizeRoutes(node.right, currentFile), - ] + return [...this.normalizeRoutes(node.left, currentFile), ...this.normalizeRoutes(node.right, currentFile)] } if (node.type === 'ObjectExpression') { - const values = - node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] - return values.flatMap((value: any) => - this.normalizeRoutes(value, node.loc?.sourcefile || currentFile), - ) + const values = node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] + return values.flatMap((value: any) => this.normalizeRoutes(value, node.loc?.sourcefile || currentFile)) } if (node.type === 'Identifier') { @@ -533,7 +454,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { scope: any, state: any, routeList: any, - currentFile: string, + currentFile: string ) { const processed = new Set() const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) @@ -546,10 +467,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { continue } processed.add(dedupKey) - const classAst = this.resolveSymbol( - pair.handlerName, - pair.file || currentFile, - ) + const classAst = this.resolveSymbol(pair.handlerName, pair.file || currentFile) if (!classAst || classAst.type !== 'ClassDefinition') { continue } @@ -571,21 +489,10 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } // 确保 handlerSymVal 有 field 结构 - if ( - handlerSymVal && - handlerSymVal.vtype === 'class' && - !handlerSymVal.field - ) { + if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { handlerSymVal.field = {} } - this.emitHandlerEntrypoints( - analyzer, - handlerSymVal, - pair.path, - classAst, - scope, - state, - ) + this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } } @@ -605,25 +512,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { urlPattern: string, classAst: any, scope?: any, - state?: any, + state?: any ) { if (!handlerSymVal || handlerSymVal.vtype !== 'class') { return } - const httpMethods = new Set([ - 'get', - 'post', - 'put', - 'delete', - 'patch', - 'head', - 'options', - ]) + const httpMethods = new Set(['get', 'post', 'put', 'delete', 'patch', 'head', 'options']) const entrypoints = Object.entries(handlerSymVal.value) - .filter( - ([key, value]: [string, any]) => - httpMethods.has(key) && value.vtype === 'fclos', - ) + .filter(([key, value]: [string, any]) => httpMethods.has(key) && value.vtype === 'fclos') .map(([, value]: [string, any]) => value) for (const ep of entrypoints as any[]) { @@ -636,11 +532,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { let finalEp = ep if (scope && state && ep.fdef) { try { - const processedFclos = analyzer.processInstruction( - scope, - ep.fdef, - state, - ) + const processedFclos = analyzer.processInstruction(scope, ep.fdef, state) if (processedFclos && processedFclos.vtype === 'fclos') { processedFclos.parent = handlerSymVal processedFclos.params = ep.params || extractParamsFromAst(ep.fdef) @@ -672,21 +564,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.ast = finalEp.fdef } if (!finalEp.functionName) { - finalEp.functionName = - finalEp.fdef?.name?.name || - finalEp.fdef?.id?.name || - finalEp.name || - '' + finalEp.functionName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' } // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { const FileUtil = require('../../../util/file-util') const { sourcefile } = finalEp.fdef.loc if (Config.maindir && typeof Config.maindir === 'string') { - finalEp.filePath = FileUtil.extractRelativePath( - sourcefile, - Config.maindir, - ) + finalEp.filePath = FileUtil.extractRelativePath(sourcefile, Config.maindir) } else { finalEp.filePath = sourcefile } @@ -706,10 +591,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { continue } // 注册参数为 source - const funcName = - finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const sourceFile = - finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' let scopeFile: string | null = null if (sourceFile) { if (Config.maindir && typeof Config.maindir === 'string') { @@ -720,8 +603,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } const paramMetas = - (Array.isArray((finalEp as any).params) && - (finalEp as any).params.length + (Array.isArray((finalEp as any).params) && (finalEp as any).params.length ? (finalEp as any).params : extractParamsFromAst(finalEp.fdef)) || [] if (paramMetas.length > 0) { @@ -750,8 +632,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const members = classNode.body || [] members.forEach((member: any) => { if (member.type !== 'FunctionDefinition') return - const memberName = - member.name?.name || member.name?.id?.name || member.id?.name + const memberName = member.name?.name || member.name?.id?.name || member.id?.name if (memberName) { value[memberName] = { vtype: 'fclos', diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 2ee70cc7..d23b434b 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -34,14 +34,7 @@ export const tornadoSourceAPIs = new Set([ 'get_secure_cookie', ]) -export const passthroughFuncs = new Set([ - 'decode', - 'strip', - 'replace', - 'lower', - 'upper', - 'split', -]) +export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) /** * @@ -54,11 +47,7 @@ export function isRequestAttributeAccess(node: any): boolean { if (inner?.type !== 'MemberAccess') return false const baseName = inner.object?.name const requestName = inner.property?.name - return ( - baseName === 'self' && - requestName === 'request' && - ['body', 'query', 'headers', 'cookies'].includes(propName) - ) + return baseName === 'self' && requestName === 'request' && ['body', 'query', 'headers', 'cookies'].includes(propName) } /** @@ -117,8 +106,7 @@ export function parseRoutePair(route: any): RoutePair | null { const { callee } = route const isUrlHelper = (callee.type === 'Identifier' && callee.name === 'url') || - (callee.type === 'MemberAccess' && - AstUtil.prettyPrint(callee).includes('url')) + (callee.type === 'MemberAccess' && AstUtil.prettyPrint(callee).includes('url')) if (isUrlHelper && Array.isArray(route.arguments)) { const [first, second] = route.arguments pathExpr = first @@ -139,10 +127,7 @@ export function parseRoutePair(route: any): RoutePair | null { * @param modulePath * @param currentFile */ -export function resolveImportPath( - modulePath: string, - currentFile: string, -): string | null { +export function resolveImportPath(modulePath: string, currentFile: string): string | null { if (!modulePath) return null const currentDir = path.dirname(currentFile) const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' @@ -160,30 +145,22 @@ export function resolveImportPath( * * @param stmt */ -export function extractImportEntries( - stmt: any, -): Array<{ local: string; imported?: string }> { +export function extractImportEntries(stmt: any): Array<{ local: string; imported?: string }> { const res: Array<{ local: string; imported?: string }> = [] const { init } = stmt if (!init) return res if (Array.isArray(init?.imports) && init.imports.length > 0) { for (const spec of init.imports) { - const local = - spec.local?.name || spec.local?.value || spec.name || spec.value - const imported = - spec.imported?.name || spec.imported?.value || spec.name || spec.value + const local = spec.local?.name || spec.local?.value || spec.name || spec.value + const imported = spec.imported?.name || spec.imported?.value || spec.name || spec.value if (local) res.push({ local, imported }) } return res } if (stmt.id?.name) { - const importedName = - init?.imported?.name || - init?.imported?.value || - init?.name?.name || - init?.name?.value + const importedName = init?.imported?.name || init?.imported?.value || init?.name?.name || init?.name?.value res.push({ local: stmt.id.name, imported: importedName }) } return res @@ -200,22 +177,13 @@ export function extractParamsFromAst(funcNode: any): ParamMeta[] { : Array.isArray(funcNode?.parameters) ? funcNode.parameters : [] - const fallbackLine = - typeof funcNode?.loc?.start?.line === 'number' - ? funcNode.loc.start.line - : 'all' + const fallbackLine = typeof funcNode?.loc?.start?.line === 'number' ? funcNode.loc.start.line : 'all' const result: ParamMeta[] = [] for (const param of rawParams) { const name = param?.id?.name || param?.name if (!name) continue - const locStart = - typeof param?.loc?.start?.line === 'number' - ? param.loc.start.line - : fallbackLine - const locEnd = - typeof param?.loc?.end?.line === 'number' - ? param.loc.end.line - : fallbackLine + const locStart = typeof param?.loc?.start?.line === 'number' ? param.loc.start.line : fallbackLine + const locEnd = typeof param?.loc?.end?.line === 'number' ? param.loc.end.line : fallbackLine result.push({ name, locStart, locEnd }) } return result From 20678809c5581214a25ccf3301a4e6bb15884f06 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Fri, 19 Dec 2025 20:07:31 -0800 Subject: [PATCH 08/12] Fix: update Python analyzer and Tornado taint checker rules --- .../rule_config_python.json | 7 +- src/checker/common/checker.ts | 5 +- src/checker/common/rules-basic-handler.ts | 6 +- src/checker/taint/common-kit/source-util.ts | 7 +- .../python/python-taint-abstract-checker.ts | 29 + .../taint/python/tornado-taint-checker.ts | 758 ++++++++++++------ src/checker/taint/python/tornado-util.ts | 56 +- src/engine/analyzer/common/analyzer.ts | 6 + .../analyzer/python/common/python-analyzer.ts | 54 +- 9 files changed, 679 insertions(+), 249 deletions(-) diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json index 93de362d..1b866057 100644 --- a/resource/example-rule-config/rule_config_python.json +++ b/resource/example-rule-config/rule_config_python.json @@ -1,6 +1,11 @@ [ { - "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"], + "checkerIds": [ + "taint_flow_python_input", + "taint_flow_python_input_inner", + "taint_flow_python_django_input", + "taint_flow_python_tornado_input" + ], "sources": { "FuncCallReturnValueTaintSource": [ { diff --git a/src/checker/common/checker.ts b/src/checker/common/checker.ts index 2cf693fc..db557105 100644 --- a/src/checker/common/checker.ts +++ b/src/checker/common/checker.ts @@ -40,7 +40,10 @@ class CheckerBase { */ loadRuleConfig(checker: any): void { const checkerId = checker.getCheckerId() - const ruleConfigContent = BasicRuleHandler.getRules() + // 路径从 checker/common 回到项目根的 config + const Config = require('../../config') + // 传入 Config.ruleConfigFile,如果为空则让 getRules 从 Config 读取 + const ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { for (const ruleConfig of ruleConfigContent) { if ( diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts index 86d3d69c..865dc894 100644 --- a/src/checker/common/rules-basic-handler.ts +++ b/src/checker/common/rules-basic-handler.ts @@ -18,8 +18,10 @@ let preprocessReady: boolean = false * * @param ruleConfigPath */ -function getRules(ruleConfigPath: string): any[] { - if (!rules) { +function getRules(ruleConfigPath?: string): any[] { + // 如果传入了 ruleConfigPath,或者 config.ruleConfigFile 已设置但 rules 未加载,则重新加载 + const currentRuleConfigFile = ruleConfigPath || config.ruleConfigFile + if (!rules || (currentRuleConfigFile && !rules)) { try { if (ruleConfigPath) { rules = FileUtil.loadJSONfile(ruleConfigPath) diff --git a/src/checker/taint/common-kit/source-util.ts b/src/checker/taint/common-kit/source-util.ts index 1eb2fa15..9c9778c5 100644 --- a/src/checker/taint/common-kit/source-util.ts +++ b/src/checker/taint/common-kit/source-util.ts @@ -171,7 +171,12 @@ function introduceTaintAtIdentifier(node: any, res: any, sourceScopeVal: any): a markTaintSource(res, { path: node, kind: val.kind }) } } - } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) { + } else if ( + node.loc.sourcefile && + node.loc.sourcefile.includes(val.scopeFile) && + nodeStart >= valStart && + nodeEnd <= valEnd + ) { markTaintSource(res, { path: node, kind: val.kind }) } } diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts index f4ac5fa1..30e00646 100644 --- a/src/checker/taint/python/python-taint-abstract-checker.ts +++ b/src/checker/taint/python/python-taint-abstract-checker.ts @@ -21,7 +21,36 @@ class PythonTaintAbstractChecker extends TaintChecker { * @param info */ triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) { + // Try normal matching first IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value) + + // If preprocess is not ready, still mark parameters that are in sourceScope + const BasicRuleHandler = require('../../common/rules-basic-handler') + if (!BasicRuleHandler.getPreprocessReady() && this.sourceScope.value && this.sourceScope.value.length > 0) { + for (const source of this.sourceScope.value) { + // Check if kind matches (could be string or array) + const kindMatches = + source.kind === 'PYTHON_INPUT' || (Array.isArray(source.kind) && source.kind.includes('PYTHON_INPUT')) + + if (source.path === node.name && kindMatches) { + // For path parameters, we use 'all' for all scope conditions, so always match + const shouldMatch = + (source.scopeFile === 'all' || !source.scopeFile) && + (source.scopeFunc === 'all' || !source.scopeFunc) && + (source.locStart === 'all' || !source.locStart) && + (source.locEnd === 'all' || !source.locEnd) + + if (shouldMatch && (!info.res._tags || info.res._tags.size === 0)) { + if (!info.res._tags) { + info.res._tags = new Set() + } + info.res._tags.add('PYTHON_INPUT') + info.res.hasTagRec = true + break + } + } + } + } } /** diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index fa905eba..08a57606 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -1,11 +1,14 @@ -import type { FileCache, RoutePair } from './tornado-util' - +const path = require('path') const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') -const { extractRelativePath } = require('../../../util/file-util') +const FileUtil = require('../../../util/file-util') + +const { extractRelativePath } = FileUtil const AstUtil = require('../../../util/ast-util') const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') const logger = require('../../../util/logger')(__filename) +const BasicRuleHandler = require('../../common/rules-basic-handler') +const { mergeAToB } = require('../../../util/common-util') const { isTornadoCall, parseRoutePair, @@ -14,14 +17,33 @@ const { extractParamsFromAst, tornadoSourceAPIs, passthroughFuncs, + isRequestAttributeExpression, + isRequestAttributeAccess, } = require('./tornado-util') +// Type definitions (moved from import to avoid module resolution issues) +interface FileCache { + vars: Map + classes: Map + importedSymbols: Map +} + +interface RoutePair { + path: string + handlerName: string + file?: string +} + /** * Tornado Taint Checker Base Class */ class TornadoTaintChecker extends PythonTaintAbstractChecker { private fileCache = new Map() + private cachedRuleConfigFile: string | null = null + + private cachedRuleConfigContent: any[] | null = null + /** * Helper function to mark a value as tainted * @param value @@ -41,6 +63,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ constructor(resultManager: any) { super(resultManager, 'taint_flow_python_tornado_input') + // 基类构造函数会调用 loadRuleConfig,但此时 Config.ruleConfigFile 可能还没有被设置 + // 所以我们在这里不加载规则配置,而是在 triggerAtStartOfAnalyze 中加载 } /** @@ -53,50 +77,81 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) - const BasicRuleHandler = require('../../common/rules-basic-handler') - // 尝试从命令行参数获取 ruleConfigFile - let { ruleConfigFile } = Config - if (!ruleConfigFile || ruleConfigFile === '') { - const args = process.argv - const ruleConfigIndex = args.indexOf('--ruleConfigFile') - if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { - ruleConfigFile = args[ruleConfigIndex + 1] - const path = require('path') - ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) + const ruleConfigFile: string | null = null + let ruleConfigContent: any[] | null = null + + const currentRuleConfigFile = Config.ruleConfigFile || this.getRuleConfigFileFromArgs() + + if (currentRuleConfigFile && currentRuleConfigFile !== '') { + try { + ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) + this.cachedRuleConfigFile = currentRuleConfigFile + this.cachedRuleConfigContent = ruleConfigContent + } catch (e: any) { + ruleConfigContent = [] + } + } else if (this.cachedRuleConfigContent !== null) { + // 使用缓存的配置内容 + ruleConfigContent = this.cachedRuleConfigContent + } else { + // 尝试从 BasicRuleHandler 获取(可能已经在构造函数中加载) + try { + ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) + if (ruleConfigContent && ruleConfigContent.length > 0) { + this.cachedRuleConfigContent = ruleConfigContent + } + } catch (e: any) { + ruleConfigContent = [] } } - try { - let ruleConfigContent: any[] = [] - if (ruleConfigFile && ruleConfigFile !== '') { - const FileUtil = require('../../../util/file-util') - ruleConfigContent = FileUtil.loadJSONfile(ruleConfigFile) - } else { - ruleConfigContent = BasicRuleHandler.getRules() - } - if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { - for (const ruleConfig of ruleConfigContent) { - if ( - ruleConfig.checkerIds && - ((Array.isArray(ruleConfig.checkerIds) && - ruleConfig.checkerIds.length > 0 && - ruleConfig.checkerIds.includes(this.getCheckerId())) || - ruleConfig.checkerIds === this.getCheckerId()) - ) { - const { mergeAToB } = require('../../../util/common-util') - mergeAToB(ruleConfig, this.checkerRuleConfigContent) + + // 应用规则配置 + const checkerId = this.getCheckerId() + + if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + for (const ruleConfig of ruleConfigContent) { + const checkerIds = Array.isArray(ruleConfig.checkerIds) + ? ruleConfig.checkerIds + : ruleConfig.checkerIds + ? [ruleConfig.checkerIds] + : [] + const matches = checkerIds.length > 0 && checkerIds.includes(checkerId) + + if (matches) { + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + + // 强制确保sinks被正确设置 + if (ruleConfig.sinks?.FuncCallTaintSink) { + this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} + this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink } } } - } catch (e: any) { - logger.warn(`Error reloading rule config: ${e?.message || e}`) } + // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) // 注册规则配置中的 source this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } + /** + * Get ruleConfigFile from command line arguments (cached) + * @returns The resolved ruleConfigFile path or empty string + */ + private getRuleConfigFileFromArgs(): string { + let { ruleConfigFile } = Config + if (!ruleConfigFile || ruleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + ruleConfigFile = args[ruleConfigIndex + 1] + ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) + } + } + return ruleConfigFile || '' + } + /** * Build a light-weight file cache for quick lookup. * @param analyzer @@ -128,7 +183,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (n.init?.type === 'ImportExpression') { const modulePath = n.init.from?.value || n.init.from?.name if (!modulePath) return true - const resolved = resolveImportPath(modulePath, fileName) + const resolved = resolveImportPath(modulePath, fileName, Config.maindir) if (!resolved) return true const entries = extractImportEntries(n) for (const entry of entries) { @@ -158,88 +213,135 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * On function calls, detect tornado Application/add_handlers and collect routes. + * On function call before execution, use argvalues to get resolved symbol values + * This replaces the old AST-based triggerAtFuncCallSyntax approach. + * Using symbol interpretation allows us to: + * 1. Get resolved symbol values for arguments (especially strings) via argvalues + * 2. Handle cases where route lists are obtained through function calls + * 3. Process route objects regardless of how they are obtained (variable, function call, etc.) * @param analyzer * @param scope * @param node * @param state - * @param _info + * @param info */ - triggerAtFuncCallSyntax(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { + triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + // 先调用基类方法 + super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) + + const { fclos, argvalues } = info + if (!fclos || !argvalues) return + const fileName = node.loc?.sourcefile if (!fileName) return - // Application(...) -> first arg is routes - if (isTornadoCall(node, 'Application')) { - const routeList = node.arguments?.[0] - if (routeList) { - this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) - } + // 检查是否是 Application 或 add_handlers 调用 + let routeListArgValue: any = null + const isApp = isTornadoCall(node, 'Application') + const isAddHandlers = isTornadoCall(node, 'add_handlers') + + if (isApp) { + // Application(...) -> first arg is routes + ;[routeListArgValue] = argvalues + } else if (isAddHandlers) { + // add_handlers(host, routes) -> second arg is routes + ;[, routeListArgValue] = argvalues } - // add_handlers(host, routes) -> second arg is routes - if (isTornadoCall(node, 'add_handlers')) { - const routeList = node.arguments?.[1] - if (routeList) { - this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) - } + if (routeListArgValue) { + this.collectTornadoEntrypointAndSourceFromArgValue(analyzer, scope, state, routeListArgValue, fileName) } } /** - * Override triggerAtIdentifier to mark path parameters as sources + * Collect entrypoints and sources from resolved symbol values (from argvalues) * @param analyzer * @param scope - * @param node * @param state - * @param info + * @param routeListSymVal - Resolved symbol value for route list + * @param currentFile */ - triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 先调用基类方法 - super.triggerAtIdentifier(analyzer, scope, node, state, info) - // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 - const { res } = info - if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { - for (const val of this.sourceScope.value) { - if (val.path === node.name || res._sid === val.path || res._qid === val.path) { - // 检查作用域匹配 - const nodeStart = node.loc?.start?.line - const nodeEnd = node.loc?.end?.line - const valStart = val.locStart - const valEnd = val.locEnd - let shouldMark = false - if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') { - shouldMark = true - } else if ( - valStart === 'all' && - valEnd === 'all' && - val.scopeFile !== 'all' && - val.scopeFunc === 'all' && - typeof node.loc?.sourcefile === 'string' && - node.loc.sourcefile.includes(val.scopeFile) - ) { - shouldMark = true - } else if ( - node.loc?.sourcefile && - val.scopeFile && - node.loc.sourcefile.includes(val.scopeFile) && - typeof nodeStart === 'number' && - typeof valStart === 'number' && - typeof nodeEnd === 'number' && - typeof valEnd === 'number' && - nodeStart >= valStart && - nodeEnd <= valEnd - ) { - shouldMark = true - } - if (shouldMark && (!res._tags || !res._tags.has('PYTHON_INPUT'))) { - if (!res._tags) { - res._tags = new Set() + private collectTornadoEntrypointAndSourceFromArgValue( + analyzer: any, + scope: any, + state: any, + routeListSymVal: any, + currentFile: string + ): void { + if (!routeListSymVal) return + + const processed = new Set() + const routePairs = this.extractRoutesFromSymbolValue(routeListSymVal, currentFile, analyzer, scope, state) + + for (let i = 0; i < routePairs.length; i++) { + const pair = routePairs[i] + if (!pair.path || !pair.handlerName) { + continue + } + const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` + if (processed.has(dedupKey)) { + continue + } + processed.add(dedupKey) + + let handlerSymVal: any = null + let classAst: any = null + + // Helper function to process class AST and get handler symbol value + const processHandlerClass = (ast: any) => { + classAst = ast + try { + handlerSymVal = analyzer.processInstruction(scope, classAst, state) + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} } - res._tags.add('PYTHON_INPUT') - res.hasTagRec = true + } + } catch (e) { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + } + + // First, try to use handler symbol value directly from the route pair + if (pair.handlerSymVal) { + const handlerSym = pair.handlerSymVal + // If it's already a class symbol value, use it directly + if (handlerSym.vtype === 'class') { + handlerSymVal = handlerSym + classAst = handlerSym.ast || handlerSym.fdef + } else if (handlerSym.ast && handlerSym.ast.type === 'ClassDefinition') { + // If we have the AST, process it to get the class symbol value + processHandlerClass(handlerSym.ast) + } else { + // Try to resolve from identifier + const { handlerName } = pair + const handlerFile = pair.file || currentFile + const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) + if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { + processHandlerClass(handlerClassAst) } } + } else { + // Fallback: resolve handler class from name + const { handlerName } = pair + const handlerFile = pair.file || currentFile + const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) + if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { + processHandlerClass(handlerClassAst) + } + } + + // Ensure handlerSymVal has field structure + if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { + handlerSymVal.field = {} + } + + if (handlerSymVal && classAst) { + this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } } } @@ -251,19 +353,70 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param argvalues */ checkByNameMatch(node: any, fclos: any, argvalues: any) { + // 如果sinks配置为空,尝试从规则配置文件加载(延迟加载) + if (!this.checkerRuleConfigContent.sinks?.FuncCallTaintSink) { + const Config = require('../../../config') + const FileUtil = require('../../../util/file-util') + const path = require('path') + const { mergeAToB } = require('../../../util/common-util') + + let currentRuleConfigFile = Config.ruleConfigFile + if (!currentRuleConfigFile || currentRuleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + currentRuleConfigFile = args[ruleConfigIndex + 1] + currentRuleConfigFile = path.isAbsolute(currentRuleConfigFile) + ? currentRuleConfigFile + : path.resolve(process.cwd(), currentRuleConfigFile) + } + } + + if (currentRuleConfigFile && currentRuleConfigFile !== '') { + try { + const ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) + if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + const checkerId = this.getCheckerId() + for (const ruleConfig of ruleConfigContent) { + const checkerIds = Array.isArray(ruleConfig.checkerIds) + ? ruleConfig.checkerIds + : ruleConfig.checkerIds + ? [ruleConfig.checkerIds] + : [] + if (checkerIds.includes(checkerId)) { + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + if (ruleConfig.sinks?.FuncCallTaintSink) { + this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} + this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink + } + } + } + } + } catch (e) { + // 忽略错误 + } + } + } + const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink + const callFull = this.getObj(fclos) + + // 如果还是没有rules,直接调用基类方法(基类可能会从其他地方获取规则) if (!rules || rules.length === 0) { + super.checkByNameMatch(node, fclos, argvalues) return } - const callFull = this.getObj(fclos) + if (!callFull) { super.checkByNameMatch(node, fclos, argvalues) return } // 检查是否有匹配的规则(支持部分匹配) + // 只匹配精确匹配或带点分隔符的部分匹配(如 os.system 匹配 syslib_from.os.system) + // 移除 callFull.endsWith(rule.fsig) 以避免误报(如 "system" 匹配 "test_system") const matchedRule = rules.find((rule: any) => { if (typeof rule.fsig !== 'string') return false - return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) || callFull.endsWith(rule.fsig) + return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) }) // 如果有匹配的规则,调用基类方法处理 if (matchedRule) { @@ -303,24 +456,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 处理 passthrough 函数(如 decode, strip 等) if (funcName && passthroughFuncs.has(funcName)) { - // 检查是否是 self.request.body.decode 等 source - // 对于 self.request.body.decode('utf-8'),AST 结构: - // node.callee.object.type = 'MemberAccess' (body) - // node.callee.object.object.type = 'MemberAccess' (request) - // node.callee.object.object.object.name = 'self' - if (node.callee?.type === 'MemberAccess' && node.callee.object) { - const bodyNode = node.callee.object - if ( - bodyNode.type === 'MemberAccess' && - bodyNode.property?.name === 'body' && - bodyNode.object?.type === 'MemberAccess' && - bodyNode.object.property?.name === 'request' && - bodyNode.object.object?.name === 'self' - ) { - // 直接标记返回值为 source(因为 self.request.body 是 source) - this.markAsTainted(ret) - return // 已经标记,不需要再检查 receiver - } + // 使用 isRequestAttributeExpression 统一检测 request 属性访问(如 self.request.body.decode) + // 这避免了重复的 AST 模式匹配逻辑,保持与 tornado-util.ts 的一致性 + if ( + node.callee?.type === 'MemberAccess' && + node.callee.object && + isRequestAttributeExpression(node.callee.object) + ) { + // 直接标记返回值为 source(因为 self.request.body/query/headers/cookies 等是 source) + this.markAsTainted(ret) + return // 已经标记,不需要再检查 receiver } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this @@ -330,9 +475,53 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } + /** + * Trigger before entrypoint execution + * Mark path parameters as tainted sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtSymbolInterpretOfEntryPointBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + const entryPointConfig = require('../../../engine/analyzer/common/current-entrypoint') + const entryPoint = entryPointConfig.getCurrentEntryPoint() + if (!entryPoint || !entryPoint.entryPointSymVal) return + + // Check if this entrypoint has path parameters that should be marked as tainted + const params = entryPoint.entryPointSymVal?.ast?.parameters + if (!params) return + + // Get parameter names from sourceScope + const paramNames = new Set() + for (const source of this.sourceScope.value) { + if (source.path && source.kind === 'PYTHON_INPUT') { + paramNames.add(source.path) + } + } + + // Mark matching parameters as tainted by processing them and marking the result + for (const key in params) { + const param = params[key] + const paramName = param?.id?.name || param?.name + if (paramName && paramNames.has(paramName) && paramName !== 'self') { + try { + // Process the parameter to get its symbol value + const paramSymVal = analyzer.processInstruction(entryPoint.entryPointSymVal, param.id || param, state) + if (paramSymVal) { + this.markAsTainted(paramSymVal) + } + } catch (e) { + // Ignore errors + } + } + } + } + /** * Handle Member Access Sources like self.request.body - * [Fixed]: Now checks AST node structure instead of symbolic result + * Reuses isRequestAttributeAccess from tornado-util.ts to maintain consistency * @param analyzer * @param scope * @param node @@ -342,18 +531,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { const { res } = info - if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { - const propName = node.property?.name - const subPropName = node.object?.property?.name - const baseObjName = node.object?.object?.name - - if ( - baseObjName === 'self' && - subPropName === 'request' && - ['body', 'query', 'headers', 'cookies'].includes(propName) - ) { - this.markAsTainted(res) - } + // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 + if (isRequestAttributeAccess(node)) { + this.markAsTainted(res) } } @@ -409,91 +589,210 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * Flatten route lists (handles BinaryExpression +) - * @param node - * @param currentFile + * Extract route pairs from resolved symbol values (from argvalues) + * @param routeListSymVal - Symbol value representing route list + * @param currentFile - Current file path + * @param analyzer + * @param scope + * @param state + * @returns Array of route pairs with handler symbol values */ - private normalizeRoutes(node: any, currentFile: string): RoutePair[] { - if (!node) return [] + private extractRoutesFromSymbolValue( + routeListSymVal: any, + currentFile: string, + analyzer?: any, + scope?: any, + state?: any + ): Array { + if (!routeListSymVal) return [] + + // Handle list/tuple symbol values + if (routeListSymVal.vtype === 'list' || routeListSymVal.vtype === 'tuple' || routeListSymVal.vtype === 'array') { + const elements = routeListSymVal.value || [] + return elements.flatMap((element: any) => + this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) + ) + } + + // Handle object type that might be a list (e.g., when symbol interpretation returns object for list literals) + // Check if it has numeric keys (0, 1, 2, ...) which indicates it's an array-like object + if (routeListSymVal.vtype === 'object' && routeListSymVal.value) { + const keys = Object.keys(routeListSymVal.value).filter((k) => /^\d+$/.test(k)) + if (keys.length > 0) { + // It's an array-like object, extract elements by numeric keys + const elements = keys.map((k) => routeListSymVal.value[k]) + return elements.flatMap((element: any) => + this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) + ) + } + } + + // Handle union types + if (routeListSymVal.vtype === 'union' && Array.isArray(routeListSymVal.value)) { + // Union type might represent a tuple (path, handler) + // Check if it has exactly 2 elements and try to extract as tuple + if (routeListSymVal.value.length === 2) { + const [pathSymVal, handlerSymVal] = routeListSymVal.value + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } + } + // Otherwise, recursively process each element + return routeListSymVal.value.flatMap((val: any) => + this.extractRoutesFromSymbolValue(val, currentFile, analyzer, scope, state) + ) + } - if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { - const elements = node.elements || [] - return elements.flatMap((element: any) => this.normalizeRoutes(element, currentFile)) + // Handle tuple/route pair: (path, handler) + // Check if it's a tuple with 2 elements + if ( + routeListSymVal.vtype === 'tuple' && + Array.isArray(routeListSymVal.value) && + routeListSymVal.value.length >= 2 + ) { + const [pathSymVal, handlerSymVal] = routeListSymVal.value + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } } - if (node.type === 'BinaryExpression') { - return [...this.normalizeRoutes(node.left, currentFile), ...this.normalizeRoutes(node.right, currentFile)] + // Handle object type that represents a tuple (e.g., when tuple is represented as object with 0, 1 keys) + if ( + routeListSymVal.vtype === 'object' && + routeListSymVal.value && + routeListSymVal.value['0'] && + routeListSymVal.value['1'] + ) { + const pathSymVal = routeListSymVal.value['0'] + const handlerSymVal = routeListSymVal.value['1'] + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } } - if (node.type === 'ObjectExpression') { - const values = node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] - return values.flatMap((value: any) => this.normalizeRoutes(value, node.loc?.sourcefile || currentFile)) + // Handle list concatenation via BinaryExpression (e.g., app_routes + [...]) + const astNode = routeListSymVal.ast + if (astNode && astNode.type === 'BinaryExpression' && astNode.operator === '+') { + try { + const pairs: Array = [] + const leftVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.left, state) : null + if (leftVal) { + pairs.push(...this.extractRoutesFromSymbolValue(leftVal, currentFile, analyzer, scope, state)) + } + const rightVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.right, state) : null + if (rightVal) { + pairs.push(...this.extractRoutesFromSymbolValue(rightVal, currentFile, analyzer, scope, state)) + } + if (pairs.length > 0) { + return pairs + } + } catch (e) { + // ignore and fallback to AST parse below + } } - if (node.type === 'Identifier') { - const target = this.resolveSymbol(node.name, currentFile) - if (!target) return [] - const targetFile = target.loc?.sourcefile || currentFile - return this.normalizeRoutes(target, targetFile) + // Fallback: try to parse from AST if available + if (routeListSymVal.ast) { + const pair = parseRoutePair(routeListSymVal.ast) + if (pair) { + const file = routeListSymVal.ast?.loc?.sourcefile || routeListSymVal.loc?.sourcefile || currentFile + return [{ ...pair, file }] + } } - const pair = parseRoutePair(node) - return pair ? [{ ...pair, file: currentFile }] : [] + return [] } /** - * Analyze routes and emit entrypoints & sources - * @param analyzer - * @param scope - * @param state - * @param routeList - * @param currentFile + * Extract string value from symbol value + * @param symVal - Symbol value + * @returns String value or null */ - private collectTornadoEntrypointAndSource( - analyzer: any, - scope: any, - state: any, - routeList: any, - currentFile: string - ) { - const processed = new Set() - const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) - for (const pair of normalizedRoutes) { - if (!pair.path || !pair.handlerName) { - continue + private extractStringFromSymbolValue(symVal: any): string | null { + if (!symVal) return null + + // Direct string value + if (symVal.vtype === 'string' || symVal.vtype === 'literal') { + return typeof symVal.value === 'string' ? symVal.value : null + } + + // From AST + if (symVal.ast && (symVal.ast.type === 'StringLiteral' || symVal.ast.type === 'Literal')) { + return typeof symVal.ast.value === 'string' ? symVal.ast.value : null + } + + return null + } + + /** + * Extract handler name/class from symbol value + * @param handlerSymVal - Handler symbol value + * @returns Handler name or null + */ + private extractHandlerNameFromSymbolValue(handlerSymVal: any): string | null { + if (!handlerSymVal) return null + + // If it's a class symbol value + if (handlerSymVal.vtype === 'class') { + // Try to get class name from AST + if (handlerSymVal.ast?.id?.name) { + return handlerSymVal.ast.id.name } - const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` - if (processed.has(dedupKey)) { - continue + if (handlerSymVal.ast?.name?.name) { + return handlerSymVal.ast.name.name } - processed.add(dedupKey) - const classAst = this.resolveSymbol(pair.handlerName, pair.file || currentFile) - if (!classAst || classAst.type !== 'ClassDefinition') { - continue + // Try from _sid or _qid + if (handlerSymVal._sid) { + return handlerSymVal._sid } - const classFile = classAst.loc?.sourcefile || pair.file || currentFile - // 使用 analyzer.processInstruction 来处理类对象,确保有正确的结构 - let handlerSymVal: any - try { - handlerSymVal = analyzer.processInstruction(scope, classAst, state) - if (!handlerSymVal || handlerSymVal.vtype !== 'class') { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } - } - } catch (e) { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } + if (handlerSymVal._qid) { + const parts = handlerSymVal._qid.split('.') + return parts[parts.length - 1] } - // 确保 handlerSymVal 有 field 结构 - if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { - handlerSymVal.field = {} + } + + // If it's an identifier symbol value + if (handlerSymVal.vtype === 'identifier' || handlerSymVal.vtype === 'var') { + if (handlerSymVal._sid) { + return handlerSymVal._sid + } + if (handlerSymVal.ast?.name) { + return handlerSymVal.ast.name + } + } + + // From AST + if (handlerSymVal.ast) { + if (handlerSymVal.ast.type === 'Identifier') { + return handlerSymVal.ast.name + } + if (handlerSymVal.ast.type === 'ClassDefinition') { + return handlerSymVal.ast.id?.name || handlerSymVal.ast.name?.name || null } - this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } + + return null } /** @@ -568,7 +867,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { - const FileUtil = require('../../../util/file-util') const { sourcefile } = finalEp.fdef.loc if (Config.maindir && typeof Config.maindir === 'string') { finalEp.filePath = FileUtil.extractRelativePath(sourcefile, Config.maindir) @@ -576,6 +874,10 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.filePath = sourcefile } } + // 确保 finalEp 有 ast,completeEntryPoint 可能需要它 + if (!finalEp.ast && finalEp.fdef) { + finalEp.ast = finalEp.fdef + } const entryPoint = completeEntryPoint(finalEp) // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 if ( @@ -586,39 +888,43 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { entryPoint.entryPointSymVal.parent.field = {} } analyzer.entryPoints.push(entryPoint) - } catch (e: any) { - logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) - continue - } - // 注册参数为 source - const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' - let scopeFile: string | null = null - if (sourceFile) { - if (Config.maindir && typeof Config.maindir === 'string') { - scopeFile = extractRelativePath(sourceFile, Config.maindir) - } else { - scopeFile = sourceFile + + // 注册参数为 source + const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + let scopeFile: string | null = null + if (sourceFile) { + if (Config.maindir && typeof Config.maindir === 'string') { + scopeFile = extractRelativePath(sourceFile, Config.maindir) + } else { + scopeFile = sourceFile + } } - } - const paramMetas = - (Array.isArray((finalEp as any).params) && (finalEp as any).params.length - ? (finalEp as any).params - : extractParamsFromAst(finalEp.fdef)) || [] - if (paramMetas.length > 0) { - for (const meta of paramMetas) { - if (meta.name === 'self') continue - - this.sourceScope.value.push({ - path: meta.name, - kind: 'PYTHON_INPUT', - scopeFile, - scopeFunc: funcName || 'all', - locStart: meta.locStart, - locEnd: meta.locEnd, - }) + const paramMetas = + (Array.isArray((finalEp as any).params) && (finalEp as any).params.length + ? (finalEp as any).params + : extractParamsFromAst(finalEp.fdef)) || [] + if (paramMetas.length > 0) { + for (const meta of paramMetas) { + if (meta.name === 'self') continue + // 对于路径参数,使用 'all' 以匹配所有文件和位置,因为参数可能在函数定义的不同位置 + const sourceEntry = { + path: meta.name, + kind: 'PYTHON_INPUT', + scopeFile: 'all', // 使用 'all' 以匹配所有文件 + scopeFunc: 'all', // 使用 'all' 以匹配所有函数,因为 handler 方法可能在嵌套作用域中 + locStart: 'all', // 使用 'all' 以匹配所有行号 + locEnd: 'all', // 使用 'all' 以匹配所有行号 + } + this.sourceScope.value.push(sourceEntry) + // 立即注册 source,因为 triggerAtStartOfAnalyze 可能在 entrypoints 收集之前被调用 + this.addSourceTagForSourceScope('PYTHON_INPUT', [sourceEntry]) + } } + } catch (e: any) { + logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) + continue } } } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index d23b434b..383d28da 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -64,7 +64,7 @@ export function isRequestAttributeExpression(expr: any): boolean { } /** - * + * 用来判断是否是Tornado的请求函数,例如 * @param node * @param targetName */ @@ -104,9 +104,26 @@ export function parseRoutePair(route: any): RoutePair | null { handlerNode = second } else if (route.type === 'CallExpression' && route.callee) { const { callee } = route - const isUrlHelper = - (callee.type === 'Identifier' && callee.name === 'url') || - (callee.type === 'MemberAccess' && AstUtil.prettyPrint(callee).includes('url')) + + /** + * Check if callee is a URL helper function using AST node matching + * Supports: + * - url(...) - simple identifier + * - something.url(...) - member access + * - tornado.web.url(...) - nested member access chain + * This avoids unreliable string-based matching via prettyPrint + */ + const isIdentifierUrlHelper = callee.type === 'Identifier' && callee.name === 'url' + + const isMemberAccessUrlHelper = + callee.type === 'MemberAccess' && + // Check if the final property/member is 'url' + // Supports both 'property' and 'member' fields for different AST representations + ((callee.property && callee.property.type === 'Identifier' && callee.property.name === 'url') || + (callee.member && callee.member.type === 'Identifier' && callee.member.name === 'url')) + + const isUrlHelper = isIdentifierUrlHelper || isMemberAccessUrlHelper + if (isUrlHelper && Array.isArray(route.arguments)) { const [first, second] = route.arguments pathExpr = first @@ -123,21 +140,42 @@ export function parseRoutePair(route: any): RoutePair | null { } /** - * - * @param modulePath - * @param currentFile + * Resolve Python import path to file path + * @param modulePath - The import path (e.g., "handlers.user_handler" or ".handlers.user_handler") + * @param currentFile - The current file path + * @param mainDir - Optional project root directory for absolute imports + * @returns Resolved file path or null */ -export function resolveImportPath(modulePath: string, currentFile: string): string | null { +export function resolveImportPath(modulePath: string, currentFile: string, mainDir?: string): string | null { if (!modulePath) return null + const currentDir = path.dirname(currentFile) const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' - let baseDir = currentDir + let baseDir: string + if (leadingDots.length > 0) { + // Relative import: resolve from current file's directory baseDir = path.resolve(currentDir, '../'.repeat(leadingDots.length - 1)) + } else if (mainDir) { + // Absolute import: resolve from project root (mainDir) + baseDir = mainDir + } else { + // Fallback for absolute imports when mainDir is not provided. + // This is the original behavior and is likely incorrect. + baseDir = currentDir } + const remainder = modulePath.slice(leadingDots.length) const normalized = remainder ? remainder.split('.').join(path.sep) : '' const resolved = normalized ? path.resolve(baseDir, normalized) : baseDir + + // Check if it's a package (directory with __init__.py) + const fs = require('fs') + if (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory()) { + return path.join(resolved, '__init__.py') + } + + // Regular module file return `${resolved}.py` } diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index 924607dd..ce5eb983 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -312,11 +312,17 @@ class Analyzer extends MemSpace { } return this.recordCheckerFindings() } catch (e) { + console.error(`[DEBUG] Error in analyzeProjectAsync: ${e}`) + if (e && typeof e === 'object' && 'stack' in e) { + console.error(`[DEBUG] Error stack: ${(e as any).stack}`) + } handleException( e, 'Error occurred in analyzer analyzeProjectAsync', 'Error occurred in analyzer analyzeProjectAsync' ) + // Still return findings even if there was an error + return this.recordCheckerFindings() } } diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 427ad6df..aa99fada 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -105,19 +105,53 @@ class PythonAnalyzer extends (Analyzer as any) { return true } const hasAnalysised: any[] = [] - for (const entryPoint of entryPoints) { + for (let i = 0; i < entryPoints.length; i++) { + const entryPoint = entryPoints[i] if (entryPoint.type === constValue.ENGIN_START_FUNCALL) { - if ( - hasAnalysised.includes( - `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` - ) - ) { + // Serialize parameters properly to avoid [object Object] issue + // Use a custom serializer to handle circular references + const params = entryPoint.entryPointSymVal?.ast?.parameters + let paramsStr = '' + if (params) { + try { + // Try to serialize only the essential parts to avoid circular references + if (Array.isArray(params)) { + paramsStr = JSON.stringify( + params.map((p: any) => ({ + id: p?.id?.name || p?.id, + name: p?.name, + })) + ) + } else if (typeof params === 'object') { + // Extract only non-circular fields + const keys = Object.keys(params) + const simpleParams: any = {} + for (const key of keys) { + const val = params[key] + if (val && typeof val === 'object' && val.id) { + simpleParams[key] = { id: val.id?.name || val.id } + } else if (typeof val !== 'object' || val === null) { + simpleParams[key] = val + } + } + paramsStr = JSON.stringify(simpleParams) + } else { + paramsStr = String(params) + } + } catch (e) { + // Fallback: use a simple string representation + paramsStr = params.toString ? params.toString() : String(params) + } + } + // Include parent class name in key to distinguish handlers with same method name + const parentName = entryPoint?.entryPointSymVal?.parent?.id || entryPoint?.entryPointSymVal?.parent?.name || '' + const qid = entryPoint?.entryPointSymVal?._qid || '' + const entryKey = `${entryPoint.filePath}.${entryPoint.functionName}/${parentName}/${qid}#${paramsStr}.${entryPoint.attribute}` + if (hasAnalysised.includes(entryKey)) { continue } - hasAnalysised.push( - `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` - ) + hasAnalysised.push(entryKey) entryPointConfig.setCurrentEntryPoint(entryPoint) logger.info( 'EntryPoint [%s.%s] is executing', @@ -176,11 +210,13 @@ class PythonAnalyzer extends (Analyzer as any) { entryPoint.entryPointSymVal?.parent ) } catch (e) { + console.error(`[DEBUG] Error executing entrypoint [${i}]: ${e}`) handleException( e, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file`, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file` ) + // Continue to next entrypoint instead of breaking } this.checkerManager.checkAtSymbolInterpretOfEntryPointAfter(this, null, null, null, null) } else if (entryPoint.type === constValue.ENGIN_START_FILE_BEGIN) { From b08beadf08557de08b4a7fb8c139e60114bbc51c Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 02:55:36 -0800 Subject: [PATCH 09/12] Fix: update tornado framework --- src/checker/common/rules-basic-handler.ts | 6 ++---- src/checker/taint/python/tornado-taint-checker.ts | 2 ++ src/checker/taint/python/tornado-util.ts | 2 ++ src/engine/analyzer/common/analyzer.ts | 4 ---- src/engine/analyzer/python/common/python-analyzer.ts | 1 - 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts index 865dc894..86d3d69c 100644 --- a/src/checker/common/rules-basic-handler.ts +++ b/src/checker/common/rules-basic-handler.ts @@ -18,10 +18,8 @@ let preprocessReady: boolean = false * * @param ruleConfigPath */ -function getRules(ruleConfigPath?: string): any[] { - // 如果传入了 ruleConfigPath,或者 config.ruleConfigFile 已设置但 rules 未加载,则重新加载 - const currentRuleConfigFile = ruleConfigPath || config.ruleConfigFile - if (!rules || (currentRuleConfigFile && !rules)) { +function getRules(ruleConfigPath: string): any[] { + if (!rules) { try { if (ruleConfigPath) { rules = FileUtil.loadJSONfile(ruleConfigPath) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 08a57606..34548bd0 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -161,6 +161,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param _info */ triggerAtCompileUnit(analyzer: any, scope: any, node: any, _state: any, _info: any): boolean | undefined { + if (Config.entryPointMode === 'ONLY_CUSTOM') return const fileName = node.loc?.sourcefile if (!fileName) return @@ -232,6 +233,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const { fclos, argvalues } = info if (!fclos || !argvalues) return + if (Config.entryPointMode === 'ONLY_CUSTOM') return const fileName = node.loc?.sourcefile if (!fileName) return diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 383d28da..cf219aee 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -32,6 +32,8 @@ export const tornadoSourceAPIs = new Set([ 'get_body_arguments', 'get_cookie', 'get_secure_cookie', + 'get_arguments', + 'get_json_body', ]) export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index ce5eb983..1c923196 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -312,10 +312,6 @@ class Analyzer extends MemSpace { } return this.recordCheckerFindings() } catch (e) { - console.error(`[DEBUG] Error in analyzeProjectAsync: ${e}`) - if (e && typeof e === 'object' && 'stack' in e) { - console.error(`[DEBUG] Error stack: ${(e as any).stack}`) - } handleException( e, 'Error occurred in analyzer analyzeProjectAsync', diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index aa99fada..ede8c2bd 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -210,7 +210,6 @@ class PythonAnalyzer extends (Analyzer as any) { entryPoint.entryPointSymVal?.parent ) } catch (e) { - console.error(`[DEBUG] Error executing entrypoint [${i}]: ${e}`) handleException( e, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file`, From d23f87b4064ad8242b401833d02503301203fdda Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:07:33 -0800 Subject: [PATCH 10/12] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 34548bd0..d13cda6a 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -531,6 +531,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { + if (Config.entryPointMode === 'ONLY_CUSTOM') return const { res } = info // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 From 9f58641bd9e217f5acb6c08084d5cd654c54b084 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:14:09 -0800 Subject: [PATCH 11/12] Fix: update tornado --- src/checker/taint/common-kit/source-util.ts | 7 +------ src/engine/analyzer/common/analyzer.ts | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/checker/taint/common-kit/source-util.ts b/src/checker/taint/common-kit/source-util.ts index 9c9778c5..1eb2fa15 100644 --- a/src/checker/taint/common-kit/source-util.ts +++ b/src/checker/taint/common-kit/source-util.ts @@ -171,12 +171,7 @@ function introduceTaintAtIdentifier(node: any, res: any, sourceScopeVal: any): a markTaintSource(res, { path: node, kind: val.kind }) } } - } else if ( - node.loc.sourcefile && - node.loc.sourcefile.includes(val.scopeFile) && - nodeStart >= valStart && - nodeEnd <= valEnd - ) { + } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) { markTaintSource(res, { path: node, kind: val.kind }) } } diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index 1c923196..924607dd 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -317,8 +317,6 @@ class Analyzer extends MemSpace { 'Error occurred in analyzer analyzeProjectAsync', 'Error occurred in analyzer analyzeProjectAsync' ) - // Still return findings even if there was an error - return this.recordCheckerFindings() } } From fbd597844d0cbbfc316f73232144b5b222670169 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:19:07 -0800 Subject: [PATCH 12/12] Fix: update tornado --- src/checker/common/checker.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/checker/common/checker.ts b/src/checker/common/checker.ts index db557105..2cf693fc 100644 --- a/src/checker/common/checker.ts +++ b/src/checker/common/checker.ts @@ -40,10 +40,7 @@ class CheckerBase { */ loadRuleConfig(checker: any): void { const checkerId = checker.getCheckerId() - // 路径从 checker/common 回到项目根的 config - const Config = require('../../config') - // 传入 Config.ruleConfigFile,如果为空则让 getRules 从 Config 读取 - const ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) + const ruleConfigContent = BasicRuleHandler.getRules() if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { for (const ruleConfig of ruleConfigContent) { if (