From 8b5c5dcbe80ba1023074d0cbeaca866d9ab256a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:20:53 +0000 Subject: [PATCH 1/2] Initial plan From 3f6313093a796fe92b186a42521284fa4f6f4fcb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:45:04 +0000 Subject: [PATCH 2/2] Add browser and filesystem utility methods with comprehensive tests Co-authored-by: galaxyeye <1701451+galaxyeye@users.noreply.github.com> --- .../platon/pulsar/common/FileSystemUtils.kt | 365 ++++++++++++++++++ .../pulsar/common/FileSystemUtilsTest.kt | 364 +++++++++++++++++ .../driver/chrome/dom/util/ElementUtils.kt | 262 +++++++++++++ .../driver/chrome/dom/ElementUtilsTest.kt | 321 +++++++++++++++ 4 files changed, 1312 insertions(+) create mode 100644 pulsar-core/pulsar-common/src/main/kotlin/ai/platon/pulsar/common/FileSystemUtils.kt create mode 100644 pulsar-core/pulsar-common/src/test/kotlin/ai/platon/pulsar/common/FileSystemUtilsTest.kt create mode 100644 pulsar-core/pulsar-tools/pulsar-browser/src/main/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/util/ElementUtils.kt create mode 100644 pulsar-core/pulsar-tools/pulsar-browser/src/test/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/ElementUtilsTest.kt diff --git a/pulsar-core/pulsar-common/src/main/kotlin/ai/platon/pulsar/common/FileSystemUtils.kt b/pulsar-core/pulsar-common/src/main/kotlin/ai/platon/pulsar/common/FileSystemUtils.kt new file mode 100644 index 000000000..1bed80b15 --- /dev/null +++ b/pulsar-core/pulsar-common/src/main/kotlin/ai/platon/pulsar/common/FileSystemUtils.kt @@ -0,0 +1,365 @@ +package ai.platon.pulsar.common + +import java.io.IOException +import java.nio.file.* +import java.nio.file.attribute.BasicFileAttributes +import kotlin.io.path.* + +/** + * Enhanced filesystem utility methods for safe file operations. + */ +object FileSystemUtils { + + /** + * Safely delete a directory and all its contents. + * + * @param path The directory path to delete + * @param maxDepth Maximum depth to traverse (safety limit) + * @return true if deletion was successful + */ + @Throws(IOException::class) + fun deleteDirectoryRecursively(path: Path, maxDepth: Int = 100): Boolean { + if (!path.exists()) { + return true + } + + if (!path.isDirectory()) { + throw IllegalArgumentException("Path is not a directory: $path") + } + + var deleted = true + Files.walkFileTree(path, object : SimpleFileVisitor() { + private var depth = 0 + + override fun preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult { + if (++depth > maxDepth) { + throw IOException("Directory depth exceeds maximum: $maxDepth") + } + return FileVisitResult.CONTINUE + } + + override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { + try { + Files.delete(file) + } catch (e: IOException) { + deleted = false + } + return FileVisitResult.CONTINUE + } + + override fun postVisitDirectory(dir: Path, exc: IOException?): FileVisitResult { + depth-- + try { + Files.delete(dir) + } catch (e: IOException) { + deleted = false + } + return FileVisitResult.CONTINUE + } + }) + + return deleted + } + + /** + * Copy a directory and all its contents to a new location. + * + * @param source The source directory + * @param target The target directory + * @param overwrite Whether to overwrite existing files + * @return The target path + */ + @Throws(IOException::class) + fun copyDirectory(source: Path, target: Path, overwrite: Boolean = false): Path { + require(source.exists()) { "Source directory does not exist: $source" } + require(source.isDirectory()) { "Source is not a directory: $source" } + + Files.walkFileTree(source, object : SimpleFileVisitor() { + override fun preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult { + val targetDir = target.resolve(source.relativize(dir)) + if (!targetDir.exists()) { + Files.createDirectories(targetDir) + } + return FileVisitResult.CONTINUE + } + + override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { + val targetFile = target.resolve(source.relativize(file)) + if (overwrite) { + Files.copy(file, targetFile, StandardCopyOption.REPLACE_EXISTING) + } else { + Files.copy(file, targetFile) + } + return FileVisitResult.CONTINUE + } + }) + + return target + } + + /** + * Check if a path is safe to access (not trying to escape a base directory). + * + * @param basePath The base directory that should contain the path + * @param targetPath The path to validate + * @return true if the path is within the base directory + */ + fun isPathSafe(basePath: Path, targetPath: Path): Boolean { + val normalizedBase = basePath.toAbsolutePath().normalize() + val normalizedTarget = targetPath.toAbsolutePath().normalize() + + return normalizedTarget.startsWith(normalizedBase) + } + + /** + * Validate that a path is safe and resolve it relative to a base directory. + * + * @param basePath The base directory + * @param relativePath The relative path to resolve + * @return The resolved path + * @throws IllegalArgumentException if the path would escape the base directory + */ + @Throws(IllegalArgumentException::class) + fun resolveSafely(basePath: Path, relativePath: String): Path { + val resolved = basePath.resolve(relativePath).normalize() + require(isPathSafe(basePath, resolved)) { + "Path would escape base directory: $relativePath" + } + return resolved + } + + /** + * Get the size of a directory and all its contents. + * + * @param path The directory path + * @return Total size in bytes + */ + @Throws(IOException::class) + fun getDirectorySize(path: Path): Long { + if (!path.exists() || !path.isDirectory()) { + return 0L + } + + var size = 0L + Files.walkFileTree(path, object : SimpleFileVisitor() { + override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { + size += attrs.size() + return FileVisitResult.CONTINUE + } + }) + + return size + } + + /** + * Count files in a directory matching a predicate. + * + * @param path The directory path + * @param predicate The filter predicate + * @param maxDepth Maximum depth to traverse + * @return Number of matching files + */ + @Throws(IOException::class) + fun countFiles( + path: Path, + predicate: (Path) -> Boolean = { true }, + maxDepth: Int = Int.MAX_VALUE + ): Long { + if (!path.exists() || !path.isDirectory()) { + return 0L + } + + return Files.walk(path, maxDepth) + .filter { it.isRegularFile() } + .filter(predicate) + .count() + } + + /** + * Find files in a directory matching a pattern. + * + * @param path The directory path + * @param pattern Glob pattern to match + * @param maxDepth Maximum depth to traverse + * @return List of matching files + */ + @Throws(IOException::class) + fun findFiles(path: Path, pattern: String, maxDepth: Int = Int.MAX_VALUE): List { + if (!path.exists() || !path.isDirectory()) { + return emptyList() + } + + val matcher = path.fileSystem.getPathMatcher("glob:$pattern") + val results = mutableListOf() + + Files.walk(path, maxDepth).use { stream -> + stream.filter { it.isRegularFile() } + .filter { matcher.matches(it.fileName) } + .forEach { results.add(it) } + } + + return results + } + + /** + * Ensure a directory exists, creating it if necessary. + * + * @param path The directory path + * @return The directory path + */ + @Throws(IOException::class) + fun ensureDirectory(path: Path): Path { + if (!path.exists()) { + Files.createDirectories(path) + } else if (!path.isDirectory()) { + throw IOException("Path exists but is not a directory: $path") + } + return path + } + + /** + * Create a temporary directory with a unique name in the system temp directory. + * + * @param prefix Prefix for the directory name + * @return The created temporary directory path + */ + @Throws(IOException::class) + fun createTempDirectory(prefix: String = "pulsar-"): Path { + return Files.createTempDirectory(prefix) + } + + /** + * Clean up old files in a directory based on age. + * + * @param path The directory path + * @param maxAgeMillis Maximum age in milliseconds + * @param dryRun If true, only report what would be deleted without actually deleting + * @return List of deleted (or would-be-deleted) files + */ + @Throws(IOException::class) + fun cleanupOldFiles(path: Path, maxAgeMillis: Long, dryRun: Boolean = false): List { + if (!path.exists() || !path.isDirectory()) { + return emptyList() + } + + val now = System.currentTimeMillis() + val deletedFiles = mutableListOf() + + Files.walk(path).use { stream -> + stream.filter { it.isRegularFile() } + .forEach { file -> + try { + val lastModified = Files.getLastModifiedTime(file).toMillis() + if (now - lastModified > maxAgeMillis) { + deletedFiles.add(file) + if (!dryRun) { + Files.delete(file) + } + } + } catch (e: IOException) { + // Log and continue + } + } + } + + return deletedFiles + } + + /** + * Atomically write content to a file using a temporary file and rename. + * + * @param path The target file path + * @param content The content to write + * @return The file path + */ + @Throws(IOException::class) + fun writeAtomic(path: Path, content: ByteArray): Path { + val tempFile = Files.createTempFile(path.parent, ".tmp-", path.fileName.toString()) + try { + Files.write(tempFile, content) + Files.move(tempFile, path, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING) + } catch (e: Exception) { + // Clean up temp file on failure + try { + Files.deleteIfExists(tempFile) + } catch (cleanup: IOException) { + // Ignore cleanup errors + } + throw e + } + return path + } + + /** + * Atomically write text content to a file using a temporary file and rename. + * + * @param path The target file path + * @param content The text content to write + * @return The file path + */ + @Throws(IOException::class) + fun writeAtomic(path: Path, content: String): Path { + return writeAtomic(path, content.toByteArray()) + } + + /** + * Check if a file is empty. + * + * @param path The file path + * @return true if the file is empty or doesn't exist + */ + fun isEmpty(path: Path): Boolean { + if (!path.exists()) { + return true + } + + return try { + Files.size(path) == 0L + } catch (e: IOException) { + true + } + } + + /** + * Get file extension. + * + * @param path The file path + * @return The file extension without the dot, or empty string if none + */ + fun getExtension(path: Path): String { + val fileName = path.fileName.toString() + val dotIndex = fileName.lastIndexOf('.') + + return if (dotIndex > 0 && dotIndex < fileName.length - 1) { + fileName.substring(dotIndex + 1) + } else { + "" + } + } + + /** + * Change file extension. + * + * @param path The file path + * @param newExtension The new extension (without dot) + * @return New path with changed extension + */ + fun changeExtension(path: Path, newExtension: String): Path { + val fileName = path.fileName.toString() + val dotIndex = fileName.lastIndexOf('.') + + val baseName = if (dotIndex > 0) { + fileName.substring(0, dotIndex) + } else { + fileName + } + + val newFileName = if (newExtension.isEmpty()) { + baseName + } else { + "$baseName.$newExtension" + } + + return path.resolveSibling(newFileName) + } +} diff --git a/pulsar-core/pulsar-common/src/test/kotlin/ai/platon/pulsar/common/FileSystemUtilsTest.kt b/pulsar-core/pulsar-common/src/test/kotlin/ai/platon/pulsar/common/FileSystemUtilsTest.kt new file mode 100644 index 000000000..9eb5d50c9 --- /dev/null +++ b/pulsar-core/pulsar-common/src/test/kotlin/ai/platon/pulsar/common/FileSystemUtilsTest.kt @@ -0,0 +1,364 @@ +package ai.platon.pulsar.common + +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.Assertions.* +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.io.TempDir +import java.nio.file.Files +import java.nio.file.Path +import kotlin.io.path.* + +class FileSystemUtilsTest { + + @TempDir + lateinit var tempDir: Path + + private lateinit var testDir: Path + + @BeforeEach + fun setUp() { + testDir = tempDir.resolve("test") + Files.createDirectories(testDir) + } + + @AfterEach + fun tearDown() { + // Cleanup is handled by @TempDir + } + + @Test + fun `deleteDirectoryRecursively deletes directory and contents`() { + val subDir = testDir.resolve("subdir") + Files.createDirectories(subDir) + Files.writeString(subDir.resolve("file.txt"), "content") + + assertTrue(subDir.exists()) + assertTrue(FileSystemUtils.deleteDirectoryRecursively(subDir)) + assertFalse(subDir.exists()) + } + + @Test + fun `deleteDirectoryRecursively returns true for non-existent directory`() { + val nonExistent = testDir.resolve("does-not-exist") + assertTrue(FileSystemUtils.deleteDirectoryRecursively(nonExistent)) + } + + @Test + fun `deleteDirectoryRecursively throws for non-directory path`() { + val file = testDir.resolve("file.txt") + Files.writeString(file, "content") + + assertThrows(IllegalArgumentException::class.java) { + FileSystemUtils.deleteDirectoryRecursively(file) + } + } + + @Test + fun `copyDirectory copies all files and subdirectories`() { + val source = testDir.resolve("source") + val target = testDir.resolve("target") + + Files.createDirectories(source) + Files.writeString(source.resolve("file1.txt"), "content1") + + val subDir = source.resolve("subdir") + Files.createDirectories(subDir) + Files.writeString(subDir.resolve("file2.txt"), "content2") + + FileSystemUtils.copyDirectory(source, target) + + assertTrue(target.exists()) + assertTrue(target.resolve("file1.txt").exists()) + assertTrue(target.resolve("subdir").exists()) + assertTrue(target.resolve("subdir/file2.txt").exists()) + assertEquals("content1", Files.readString(target.resolve("file1.txt"))) + assertEquals("content2", Files.readString(target.resolve("subdir/file2.txt"))) + } + + @Test + fun `copyDirectory with overwrite replaces existing files`() { + val source = testDir.resolve("source") + val target = testDir.resolve("target") + + Files.createDirectories(source) + Files.createDirectories(target) + + Files.writeString(source.resolve("file.txt"), "new content") + Files.writeString(target.resolve("file.txt"), "old content") + + FileSystemUtils.copyDirectory(source, target, overwrite = true) + + assertEquals("new content", Files.readString(target.resolve("file.txt"))) + } + + @Test + fun `isPathSafe returns true for path within base directory`() { + val basePath = testDir + val safePath = testDir.resolve("subdir/file.txt") + + assertTrue(FileSystemUtils.isPathSafe(basePath, safePath)) + } + + @Test + fun `isPathSafe returns false for path outside base directory`() { + val basePath = testDir.resolve("restricted") + val unsafePath = testDir.resolve("outside/file.txt") + + assertFalse(FileSystemUtils.isPathSafe(basePath, unsafePath)) + } + + @Test + fun `isPathSafe prevents path traversal attack`() { + val basePath = testDir.resolve("safe") + Files.createDirectories(basePath) + val attackPath = basePath.resolve("../../../etc/passwd") + + assertFalse(FileSystemUtils.isPathSafe(basePath, attackPath)) + } + + @Test + fun `resolveSafely resolves safe relative paths`() { + val basePath = testDir + val resolved = FileSystemUtils.resolveSafely(basePath, "subdir/file.txt") + + assertTrue(resolved.startsWith(basePath)) + assertEquals("file.txt", resolved.fileName.toString()) + } + + @Test + fun `resolveSafely throws for paths that escape base directory`() { + val basePath = testDir.resolve("restricted") + Files.createDirectories(basePath) + + assertThrows(IllegalArgumentException::class.java) { + FileSystemUtils.resolveSafely(basePath, "../../outside.txt") + } + } + + @Test + fun `getDirectorySize calculates total size correctly`() { + val dir = testDir.resolve("sizetest") + Files.createDirectories(dir) + + Files.writeString(dir.resolve("file1.txt"), "12345") // 5 bytes + Files.writeString(dir.resolve("file2.txt"), "1234567890") // 10 bytes + + val size = FileSystemUtils.getDirectorySize(dir) + assertEquals(15L, size) + } + + @Test + fun `getDirectorySize returns zero for non-existent directory`() { + val nonExistent = testDir.resolve("does-not-exist") + assertEquals(0L, FileSystemUtils.getDirectorySize(nonExistent)) + } + + @Test + fun `countFiles counts all files in directory`() { + val dir = testDir.resolve("counttest") + Files.createDirectories(dir) + + Files.writeString(dir.resolve("file1.txt"), "content") + Files.writeString(dir.resolve("file2.txt"), "content") + + val subDir = dir.resolve("subdir") + Files.createDirectories(subDir) + Files.writeString(subDir.resolve("file3.txt"), "content") + + val count = FileSystemUtils.countFiles(dir) + assertEquals(3L, count) + } + + @Test + fun `countFiles with predicate filters files`() { + val dir = testDir.resolve("filtertest") + Files.createDirectories(dir) + + Files.writeString(dir.resolve("file1.txt"), "content") + Files.writeString(dir.resolve("file2.kt"), "content") + Files.writeString(dir.resolve("file3.txt"), "content") + + val count = FileSystemUtils.countFiles(dir, predicate = { FileSystemUtils.getExtension(it) == "txt" }) + assertEquals(2L, count) + } + + @Test + fun `findFiles finds files matching glob pattern`() { + val dir = testDir.resolve("findtest") + Files.createDirectories(dir) + + Files.writeString(dir.resolve("file1.kt"), "content") + Files.writeString(dir.resolve("file2.kt"), "content") + Files.writeString(dir.resolve("file3.java"), "content") + + val ktFiles = FileSystemUtils.findFiles(dir, "*.kt") + assertEquals(2, ktFiles.size) + assertTrue(ktFiles.all { FileSystemUtils.getExtension(it) == "kt" }) + } + + @Test + fun `ensureDirectory creates directory if not exists`() { + val newDir = testDir.resolve("newdir") + assertFalse(newDir.exists()) + + FileSystemUtils.ensureDirectory(newDir) + assertTrue(newDir.exists()) + assertTrue(newDir.isDirectory()) + } + + @Test + fun `ensureDirectory does not throw if directory already exists`() { + val existingDir = testDir.resolve("existing") + Files.createDirectories(existingDir) + + assertDoesNotThrow { + FileSystemUtils.ensureDirectory(existingDir) + } + } + + @Test + fun `ensureDirectory throws if path is a file`() { + val file = testDir.resolve("file.txt") + Files.writeString(file, "content") + + assertThrows(Exception::class.java) { + FileSystemUtils.ensureDirectory(file) + } + } + + @Test + fun `createTempDirectory creates directory with prefix`() { + val tempDir = FileSystemUtils.createTempDirectory("test-prefix-") + + assertTrue(tempDir.exists()) + assertTrue(tempDir.isDirectory()) + assertTrue(tempDir.fileName.toString().startsWith("test-prefix-")) + + // Cleanup + Files.delete(tempDir) + } + + @Test + fun `cleanupOldFiles removes files older than max age`() { + val dir = testDir.resolve("cleanup") + Files.createDirectories(dir) + + val oldFile = dir.resolve("old.txt") + val newFile = dir.resolve("new.txt") + + Files.writeString(oldFile, "content") + Files.writeString(newFile, "content") + + // Set old file's last modified time to 2 hours ago + val twoHoursAgo = System.currentTimeMillis() - (2 * 60 * 60 * 1000) + oldFile.toFile().setLastModified(twoHoursAgo) + + val deleted = FileSystemUtils.cleanupOldFiles(dir, 60 * 60 * 1000) // 1 hour + + assertEquals(1, deleted.size) + assertEquals("old.txt", deleted[0].fileName.toString()) + } + + @Test + fun `cleanupOldFiles dry run does not delete files`() { + val dir = testDir.resolve("dryrun") + Files.createDirectories(dir) + + val oldFile = dir.resolve("old.txt") + Files.writeString(oldFile, "content") + + val twoHoursAgo = System.currentTimeMillis() - (2 * 60 * 60 * 1000) + oldFile.toFile().setLastModified(twoHoursAgo) + + FileSystemUtils.cleanupOldFiles(dir, 60 * 60 * 1000, dryRun = true) + + assertTrue(oldFile.exists()) + } + + @Test + fun `writeAtomic writes content atomically`() { + val file = testDir.resolve("atomic.txt") + val content = "test content" + + FileSystemUtils.writeAtomic(file, content) + + assertTrue(file.exists()) + assertEquals(content, Files.readString(file)) + } + + @Test + fun `writeAtomic with bytes writes content atomically`() { + val file = testDir.resolve("atomic-bytes.txt") + val content = "test content".toByteArray() + + FileSystemUtils.writeAtomic(file, content) + + assertTrue(file.exists()) + assertArrayEquals(content, Files.readAllBytes(file)) + } + + @Test + fun `isEmpty returns true for empty file`() { + val file = testDir.resolve("empty.txt") + Files.writeString(file, "") + + assertTrue(FileSystemUtils.isEmpty(file)) + } + + @Test + fun `isEmpty returns false for non-empty file`() { + val file = testDir.resolve("nonempty.txt") + Files.writeString(file, "content") + + assertFalse(FileSystemUtils.isEmpty(file)) + } + + @Test + fun `isEmpty returns true for non-existent file`() { + val file = testDir.resolve("does-not-exist.txt") + assertTrue(FileSystemUtils.isEmpty(file)) + } + + @Test + fun `getExtension returns correct extension`() { + val file = testDir.resolve("document.pdf") + assertEquals("pdf", FileSystemUtils.getExtension(file)) + } + + @Test + fun `getExtension returns empty string for no extension`() { + val file = testDir.resolve("noextension") + assertEquals("", FileSystemUtils.getExtension(file)) + } + + @Test + fun `getExtension handles multiple dots`() { + val file = testDir.resolve("archive.tar.gz") + assertEquals("gz", FileSystemUtils.getExtension(file)) + } + + @Test + fun `changeExtension changes file extension`() { + val file = testDir.resolve("document.txt") + val newPath = FileSystemUtils.changeExtension(file, "pdf") + + assertEquals("document.pdf", newPath.fileName.toString()) + } + + @Test + fun `changeExtension removes extension when new extension is empty`() { + val file = testDir.resolve("document.txt") + val newPath = FileSystemUtils.changeExtension(file, "") + + assertEquals("document", newPath.fileName.toString()) + } + + @Test + fun `changeExtension adds extension to file without extension`() { + val file = testDir.resolve("document") + val newPath = FileSystemUtils.changeExtension(file, "txt") + + assertEquals("document.txt", newPath.fileName.toString()) + } +} diff --git a/pulsar-core/pulsar-tools/pulsar-browser/src/main/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/util/ElementUtils.kt b/pulsar-core/pulsar-tools/pulsar-browser/src/main/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/util/ElementUtils.kt new file mode 100644 index 000000000..a616d1e82 --- /dev/null +++ b/pulsar-core/pulsar-tools/pulsar-browser/src/main/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/util/ElementUtils.kt @@ -0,0 +1,262 @@ +package ai.platon.pulsar.browser.driver.chrome.dom.util + +import ai.platon.pulsar.browser.driver.chrome.dom.model.DOMRect +import ai.platon.pulsar.browser.driver.chrome.dom.model.DOMTreeNodeEx +import ai.platon.pulsar.browser.driver.chrome.dom.model.NodeType + +/** + * Utility functions for DOM element operations. + */ +object ElementUtils { + + /** + * Check if an element is visible based on its bounds and computed styles. + * + * @param node The DOM node to check + * @return true if the element is considered visible + */ + fun isVisible(node: DOMTreeNodeEx): Boolean { + // Only element nodes can be visible + if (node.nodeType != NodeType.ELEMENT_NODE) { + return false + } + + val snapshot = node.snapshotNode ?: return false + + // Check computed styles for visibility + val styles = snapshot.computedStyles + if (styles != null && styles.isNotEmpty()) { + val display = styles["display"]?.lowercase() + val visibility = styles["visibility"]?.lowercase() + val opacity = styles["opacity"]?.toDoubleOrNull() + + if (display == "none") return false + if (visibility == "hidden") return false + if (opacity != null && opacity <= 0.0) return false + } + + // Check bounds - element must have non-zero dimensions + val clientRect = snapshot.clientRects ?: return false + if (clientRect.width <= 0.0 || clientRect.height <= 0.0) { + return false + } + + return true + } + + /** + * Check if an element is within the viewport bounds. + * + * @param node The DOM node to check + * @param viewportWidth The viewport width + * @param viewportHeight The viewport height + * @return true if the element is within viewport + */ + fun isInViewport(node: DOMTreeNodeEx, viewportWidth: Double, viewportHeight: Double): Boolean { + val snapshot = node.snapshotNode ?: return false + val clientRect = snapshot.clientRects ?: return false + + // Element must be at least partially visible in viewport + return clientRect.x < viewportWidth && + clientRect.y < viewportHeight && + clientRect.x + clientRect.width > 0.0 && + clientRect.y + clientRect.height > 0.0 + } + + /** + * Check if two element bounds overlap. + * + * @param rect1 First rectangle + * @param rect2 Second rectangle + * @return true if rectangles overlap + */ + fun boundsOverlap(rect1: DOMRect, rect2: DOMRect): Boolean { + return !(rect1.x + rect1.width < rect2.x || + rect2.x + rect2.width < rect1.x || + rect1.y + rect1.height < rect2.y || + rect2.y + rect2.height < rect1.y) + } + + /** + * Calculate the area of intersection between two rectangles. + * + * @param rect1 First rectangle + * @param rect2 Second rectangle + * @return Intersection area, or 0.0 if no overlap + */ + fun intersectionArea(rect1: DOMRect, rect2: DOMRect): Double { + if (!boundsOverlap(rect1, rect2)) { + return 0.0 + } + + val x1 = maxOf(rect1.x, rect2.x) + val y1 = maxOf(rect1.y, rect2.y) + val x2 = minOf(rect1.x + rect1.width, rect2.x + rect2.width) + val y2 = minOf(rect1.y + rect1.height, rect2.y + rect2.height) + + return (x2 - x1) * (y2 - y1) + } + + /** + * Check if an element is interactive based on its tag and attributes. + * + * @param node The DOM node to check + * @return true if the element is considered interactive + */ + fun isInteractive(node: DOMTreeNodeEx): Boolean { + if (node.nodeType != NodeType.ELEMENT_NODE) { + return false + } + + val tag = node.nodeName.lowercase() + + // Known interactive tags + if (tag in setOf("a", "button", "input", "select", "textarea")) { + return true + } + + // Check for click handlers or role attributes + val onclick = node.attributes["onclick"] + val role = node.attributes["role"] + + if (!onclick.isNullOrEmpty()) return true + if (role in setOf("button", "link", "checkbox", "radio", "menuitem")) { + return true + } + + // Check tabindex + val tabindex = node.attributes["tabindex"]?.toIntOrNull() + if (tabindex != null && tabindex >= 0) { + return true + } + + return false + } + + /** + * Get the center point of an element's bounding box. + * + * @param node The DOM node + * @return Pair of (x, y) coordinates, or null if bounds unavailable + */ + fun getCenter(node: DOMTreeNodeEx): Pair? { + val snapshot = node.snapshotNode ?: return null + val clientRect = snapshot.clientRects ?: return null + + val x = clientRect.x + clientRect.width / 2.0 + val y = clientRect.y + clientRect.height / 2.0 + + return Pair(x, y) + } + + /** + * Find the first ancestor matching a predicate from an ancestors list. + * + * @param ancestors The list of ancestor nodes + * @param predicate The condition to match + * @return The first matching ancestor or null + */ + fun findAncestor(ancestors: List, predicate: (DOMTreeNodeEx) -> Boolean): DOMTreeNodeEx? { + return ancestors.firstOrNull(predicate) + } + + /** + * Get all descendant nodes using depth-first traversal. + * + * @param node The root node + * @param maxDepth Maximum depth to traverse + * @return List of all descendant nodes + */ + fun getDescendants(node: DOMTreeNodeEx, maxDepth: Int = Int.MAX_VALUE): List { + val descendants = mutableListOf() + + fun traverse(current: DOMTreeNodeEx, depth: Int) { + if (depth >= maxDepth) return + + for (child in current.children) { + descendants.add(child) + traverse(child, depth + 1) + } + } + + traverse(node, 0) + return descendants + } + + /** + * Find all descendants matching a predicate. + * + * @param node The root node + * @param predicate The condition to match + * @param maxDepth Maximum depth to traverse + * @return List of matching descendants + */ + fun findDescendants( + node: DOMTreeNodeEx, + predicate: (DOMTreeNodeEx) -> Boolean, + maxDepth: Int = Int.MAX_VALUE + ): List { + val matches = mutableListOf() + + fun traverse(current: DOMTreeNodeEx, depth: Int) { + if (depth >= maxDepth) return + + for (child in current.children) { + if (predicate(child)) { + matches.add(child) + } + traverse(child, depth + 1) + } + } + + traverse(node, 0) + return matches + } + + /** + * Check if a node has any text content. + * + * @param node The DOM node + * @return true if node or its descendants contain text + */ + fun hasTextContent(node: DOMTreeNodeEx): Boolean { + if (node.nodeType == NodeType.TEXT_NODE) { + return !node.nodeValue.isNullOrBlank() + } + + // Check children recursively + return node.children.any { hasTextContent(it) } + } + + /** + * Get the visible text content of a node and its descendants. + * + * @param node The DOM node + * @param maxLength Maximum length of text to extract + * @return Extracted text content + */ + fun getTextContent(node: DOMTreeNodeEx, maxLength: Int = Int.MAX_VALUE): String { + val text = StringBuilder() + + fun extractText(current: DOMTreeNodeEx) { + if (text.length >= maxLength) return + + if (current.nodeType == NodeType.TEXT_NODE) { + val value = current.nodeValue?.trim() + if (!value.isNullOrEmpty()) { + if (text.isNotEmpty()) { + text.append(" ") + } + text.append(value) + } + } else if (current.nodeType == NodeType.ELEMENT_NODE) { + for (child in current.children) { + extractText(child) + } + } + } + + extractText(node) + return text.toString().take(maxLength).trim() + } +} diff --git a/pulsar-core/pulsar-tools/pulsar-browser/src/test/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/ElementUtilsTest.kt b/pulsar-core/pulsar-tools/pulsar-browser/src/test/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/ElementUtilsTest.kt new file mode 100644 index 000000000..54e7e31cf --- /dev/null +++ b/pulsar-core/pulsar-tools/pulsar-browser/src/test/kotlin/ai/platon/pulsar/browser/driver/chrome/dom/ElementUtilsTest.kt @@ -0,0 +1,321 @@ +package ai.platon.pulsar.browser.driver.chrome.dom + +import ai.platon.pulsar.browser.driver.chrome.dom.model.DOMRect +import ai.platon.pulsar.browser.driver.chrome.dom.model.DOMTreeNodeEx +import ai.platon.pulsar.browser.driver.chrome.dom.model.NodeType +import ai.platon.pulsar.browser.driver.chrome.dom.model.SnapshotNodeEx +import ai.platon.pulsar.browser.driver.chrome.dom.util.ElementUtils +import org.junit.jupiter.api.Assertions.* +import org.junit.jupiter.api.Test + +class ElementUtilsTest { + + @Test + fun `isVisible returns false for non-element nodes`() { + val textNode = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "Hello" + ) + assertFalse(ElementUtils.isVisible(textNode)) + } + + @Test + fun `isVisible returns false for element with display none`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + computedStyles = mapOf("display" to "none"), + clientRects = DOMRect(0.0, 0.0, 100.0, 50.0) + ) + ) + assertFalse(ElementUtils.isVisible(node)) + } + + @Test + fun `isVisible returns false for element with visibility hidden`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + computedStyles = mapOf("visibility" to "hidden"), + clientRects = DOMRect(0.0, 0.0, 100.0, 50.0) + ) + ) + assertFalse(ElementUtils.isVisible(node)) + } + + @Test + fun `isVisible returns false for element with zero opacity`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + computedStyles = mapOf("opacity" to "0"), + clientRects = DOMRect(0.0, 0.0, 100.0, 50.0) + ) + ) + assertFalse(ElementUtils.isVisible(node)) + } + + @Test + fun `isVisible returns false for element with zero dimensions`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + computedStyles = mapOf("display" to "block"), + clientRects = DOMRect(0.0, 0.0, 0.0, 0.0) + ) + ) + assertFalse(ElementUtils.isVisible(node)) + } + + @Test + fun `isVisible returns true for visible element`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + computedStyles = mapOf("display" to "block", "visibility" to "visible"), + clientRects = DOMRect(10.0, 20.0, 100.0, 50.0) + ) + ) + assertTrue(ElementUtils.isVisible(node)) + } + + @Test + fun `isInViewport returns true for element within viewport`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + clientRects = DOMRect(100.0, 100.0, 200.0, 150.0) + ) + ) + assertTrue(ElementUtils.isInViewport(node, 1920.0, 1080.0)) + } + + @Test + fun `isInViewport returns false for element outside viewport`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + clientRects = DOMRect(2000.0, 2000.0, 100.0, 100.0) + ) + ) + assertFalse(ElementUtils.isInViewport(node, 1920.0, 1080.0)) + } + + @Test + fun `boundsOverlap returns true for overlapping rectangles`() { + val rect1 = DOMRect(0.0, 0.0, 100.0, 100.0) + val rect2 = DOMRect(50.0, 50.0, 100.0, 100.0) + assertTrue(ElementUtils.boundsOverlap(rect1, rect2)) + } + + @Test + fun `boundsOverlap returns false for non-overlapping rectangles`() { + val rect1 = DOMRect(0.0, 0.0, 50.0, 50.0) + val rect2 = DOMRect(100.0, 100.0, 50.0, 50.0) + assertFalse(ElementUtils.boundsOverlap(rect1, rect2)) + } + + @Test + fun `intersectionArea calculates correct overlap area`() { + val rect1 = DOMRect(0.0, 0.0, 100.0, 100.0) + val rect2 = DOMRect(50.0, 50.0, 100.0, 100.0) + val area = ElementUtils.intersectionArea(rect1, rect2) + assertEquals(2500.0, area, 0.01) + } + + @Test + fun `intersectionArea returns zero for non-overlapping rectangles`() { + val rect1 = DOMRect(0.0, 0.0, 50.0, 50.0) + val rect2 = DOMRect(100.0, 100.0, 50.0, 50.0) + val area = ElementUtils.intersectionArea(rect1, rect2) + assertEquals(0.0, area, 0.01) + } + + @Test + fun `isInteractive returns true for button`() { + val node = DOMTreeNodeEx(nodeName = "button") + assertTrue(ElementUtils.isInteractive(node)) + } + + @Test + fun `isInteractive returns true for element with onclick`() { + val node = DOMTreeNodeEx( + nodeName = "div", + attributes = mapOf("onclick" to "handleClick()") + ) + assertTrue(ElementUtils.isInteractive(node)) + } + + @Test + fun `isInteractive returns true for element with button role`() { + val node = DOMTreeNodeEx( + nodeName = "div", + attributes = mapOf("role" to "button") + ) + assertTrue(ElementUtils.isInteractive(node)) + } + + @Test + fun `isInteractive returns true for element with non-negative tabindex`() { + val node = DOMTreeNodeEx( + nodeName = "div", + attributes = mapOf("tabindex" to "0") + ) + assertTrue(ElementUtils.isInteractive(node)) + } + + @Test + fun `isInteractive returns false for plain div`() { + val node = DOMTreeNodeEx(nodeName = "div") + assertFalse(ElementUtils.isInteractive(node)) + } + + @Test + fun `getCenter returns correct center coordinates`() { + val node = DOMTreeNodeEx( + nodeName = "DIV", + snapshotNode = SnapshotNodeEx( + clientRects = DOMRect(100.0, 200.0, 50.0, 30.0) + ) + ) + val center = ElementUtils.getCenter(node) + assertNotNull(center) + assertEquals(125.0, center!!.first, 0.01) + assertEquals(215.0, center.second, 0.01) + } + + @Test + fun `findAncestor returns first matching ancestor`() { + val grandparent = DOMTreeNodeEx( + nodeName = "BODY", + attributes = mapOf("class" to "container") + ) + val parent = DOMTreeNodeEx( + nodeName = "DIV" + ) + val child = DOMTreeNodeEx( + nodeName = "SPAN" + ) + + // Ancestors list: from immediate parent to root + val ancestors = listOf(parent, grandparent) + + val found = ElementUtils.findAncestor(ancestors) { it.nodeName == "BODY" } + assertNotNull(found) + assertEquals("BODY", found?.nodeName) + } + + @Test + fun `findAncestor returns null if no match`() { + val parent = DOMTreeNodeEx(nodeName = "DIV") + val ancestors = listOf(parent) + + val found = ElementUtils.findAncestor(ancestors) { it.nodeName == "TABLE" } + assertNull(found) + } + + @Test + fun `getDescendants returns all descendants`() { + val child1 = DOMTreeNodeEx(nodeName = "SPAN") + val child2 = DOMTreeNodeEx(nodeName = "A") + val parent = DOMTreeNodeEx( + nodeName = "DIV", + children = listOf(child1, child2) + ) + + val descendants = ElementUtils.getDescendants(parent) + assertEquals(2, descendants.size) + } + + @Test + fun `findDescendants returns matching descendants`() { + val span = DOMTreeNodeEx(nodeName = "SPAN") + val link = DOMTreeNodeEx(nodeName = "A") + val div = DOMTreeNodeEx(nodeName = "DIV") + val parent = DOMTreeNodeEx( + nodeName = "SECTION", + children = listOf(span, link, div) + ) + + val found = ElementUtils.findDescendants(parent, predicate = { it.nodeName == "A" }) + assertEquals(1, found.size) + assertEquals("A", found[0].nodeName) + } + + @Test + fun `hasTextContent returns true for text node`() { + val textNode = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "Hello" + ) + assertTrue(ElementUtils.hasTextContent(textNode)) + } + + @Test + fun `hasTextContent returns false for blank text node`() { + val textNode = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = " " + ) + assertFalse(ElementUtils.hasTextContent(textNode)) + } + + @Test + fun `hasTextContent returns true for element with text children`() { + val textNode = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "Content" + ) + val element = DOMTreeNodeEx( + nodeName = "DIV", + children = listOf(textNode) + ) + assertTrue(ElementUtils.hasTextContent(element)) + } + + @Test + fun `getTextContent extracts text from node and descendants`() { + val text1 = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "Hello" + ) + val text2 = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "World" + ) + val span = DOMTreeNodeEx( + nodeName = "SPAN", + children = listOf(text2) + ) + val div = DOMTreeNodeEx( + nodeName = "DIV", + children = listOf(text1, span) + ) + + val text = ElementUtils.getTextContent(div) + assertTrue(text.contains("Hello")) + assertTrue(text.contains("World")) + } + + @Test + fun `getTextContent respects maxLength`() { + val text1 = DOMTreeNodeEx( + nodeType = NodeType.TEXT_NODE, + nodeName = "#text", + nodeValue = "Hello World This Is A Long Text" + ) + val div = DOMTreeNodeEx( + nodeName = "DIV", + children = listOf(text1) + ) + + val text = ElementUtils.getTextContent(div, maxLength = 10) + assertTrue(text.length <= 10) + } +}