diff --git a/Sources/SWBCore/Settings/RecursiveSearchPathResolver.swift b/Sources/SWBCore/Settings/RecursiveSearchPathResolver.swift index 08368fc1..69fa4523 100644 --- a/Sources/SWBCore/Settings/RecursiveSearchPathResolver.swift +++ b/Sources/SWBCore/Settings/RecursiveSearchPathResolver.swift @@ -202,7 +202,7 @@ public final class RecursiveSearchPathResolver: Sendable { result.append(Path(".")) } else if sourcePath.isRoot { result.append(Path(String(path.str[path.str.utf8.index(after: path.str.utf8.startIndex)...]))) - } else if path.str.hasPrefix(sourcePath.str) && Path.pathSeparatorsUTF8.contains(path.str.utf8[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count)]) { + } else if path.str.hasPrefix(sourcePath.str) && Path.isUTF8PathSeparator(path.str.utf8[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count)]) { // FIXME: Use dropFirst() once available everywhere. result.append(Path(String(path.str[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count + 1)...]))) } else { diff --git a/Sources/SWBUtil/Path.swift b/Sources/SWBUtil/Path.swift index 28294f29..0acc3064 100644 --- a/Sources/SWBUtil/Path.swift +++ b/Sources/SWBUtil/Path.swift @@ -69,18 +69,45 @@ public struct Path: Serializable, Sendable { /// The system path separator. #if os(Windows) public static let pathSeparator = Character("\\") - public static let pathSeparatorUTF8 = UInt8(ascii: "\\") - public static let pathSeparatorsUTF8 = Set([UInt8(ascii: "\\"), UInt8(ascii: "/")]) + @inline(__always) public static var pathSeparatorUTF8: UInt8 { UInt8(ascii: "\\") } public static let pathEnvironmentSeparator = Character(";") - public static let pathSeparators = Set("\\/") + @inline(__always) public static func isUTF8PathSeparator(_ char: UInt8, separators: (some Collection)? = ([Character]?).none) -> Bool { + guard let separators else { + return char == pathSeparatorUTF8 || char == UInt8(ascii: "/") + } + // This is a bit inefficient, but separators should always be nil outside of tests + return separators.contains(String(decoding: CollectionOfOne(char), as: UTF8.self)) + } + @inline(__always) public static func firstPathSeparatorIndex(in str: some StringProtocol, separators: (some Collection)?) -> String.Index? { + guard let separators else { + return str.utf8.firstIndex(where: { Path.isUTF8PathSeparator($0, separators: separators) }) + } + return str.firstIndex(where: { separators.contains($0) }) + } #else public static let pathSeparator = Character("/") - public static let pathSeparatorUTF8 = UInt8(ascii: "/") - public static let pathSeparatorsUTF8 = Set([UInt8(ascii: "/")]) + @inline(__always) public static var pathSeparatorUTF8: UInt8 { UInt8(ascii: "/") } public static let pathEnvironmentSeparator = Character(":") - public static let pathSeparators = Set([Character("/")]) + @inline(__always) public static func isUTF8PathSeparator(_ char: UInt8, separators: (some Collection)? = ([Character]?).none) -> Bool { + guard let separators else { + return char == pathSeparatorUTF8 + } + // This is a bit inefficient, but separators should always be nil outside of tests + return separators.contains(String(decoding: CollectionOfOne(char), as: UTF8.self)) + } + @inline(__always) public static func firstPathSeparatorIndex(in str: some StringProtocol, separators: (some Collection)?) -> String.Index? { + guard let separators else { + return str.utf8.index(of: pathSeparatorUTF8) + } + return str.firstIndex(where: { separators.contains($0) }) + } #endif + @inline(__always) public static func isPathSeparator(_ char: Character, separators: (some Collection)?) -> Bool { + guard let c = char.utf8.first else { return false } + return isUTF8PathSeparator(c, separators: separators) + } + /// The system path separator, as a string. public static let pathSeparatorString = String(pathSeparator) @@ -717,9 +744,10 @@ public struct Path: Serializable, Sendable { var numComponents = 0 var isInPathComponent = false var nextCharacterIsEscaped = false - for idx in pattern.indices { + for byte in pattern.utf8 { // Skip over path separators, unless they're escaped. - if pattern[idx] == Path.pathSeparator { + //TODO: should this (and other similar uses) be Path.isUTF8PathSeparator(byte) instead for Windows? + if byte == Path.pathSeparatorUTF8 { if !nextCharacterIsEscaped { isInPathComponent = false } @@ -736,7 +764,7 @@ public struct Path: Serializable, Sendable { nextCharacterIsEscaped = false } else { - nextCharacterIsEscaped = (pattern[idx] == Character("\\")) + nextCharacterIsEscaped = (byte == UInt8(ascii: "\\")) } } return numComponents @@ -746,19 +774,20 @@ public struct Path: Serializable, Sendable { var numPathComponentsInPath = 0 var isInPathComponent = false var firstIdx: String.Index? - for idx in self.str.indices.reversed() { + let utf8Str = self.str.utf8 + for idx in utf8Str.indices.reversed() { // Skip over path separators. We ignore backslashes here, since paths don't have escape characters. - if self.str[idx] == Path.pathSeparator { + if utf8Str[idx] == Path.pathSeparatorUTF8 { isInPathComponent = false // If we've found the expected number of path components, then we stop, and record the index of the first character we want to match against. if numPathComponentsInPath == numPathComponentsInPattern { - if idx != self.str.endIndex { - firstIdx = self.str.index(after: idx) + if idx != utf8Str.endIndex { + firstIdx = utf8Str.index(after: idx) } break } } - else if idx == self.str.startIndex { + else if idx == utf8Str.startIndex { // If we didn't encounter a path separator, then the full string is the trailing subpath. firstIdx = idx break @@ -781,7 +810,7 @@ public struct Path: Serializable, Sendable { } // Create a string from the first index we found to the end of the path. - let trailingSubpath = String(self.str[first..) -> String.Index? { - if characters.isEmpty { - return nil - } - return firstIndex(where: {characters.contains($0)}) - } -} - /// Multi-platform fnmatch implementation. This is intended to be a close match the the POSIX fnmatch of all platforms including Windows (though not all options are supported). /// /// - parameter pattern: The pattern to match. When using the ``FnmatchOptions/pathname`` option, any path representation in the pattern is expected to use the POSIX path separator (`/`) to match with the input, and on Windows, the path separator (`/`) will be matched to either separator in the input string ( both `/` and `\` will be matched). @@ -54,7 +44,7 @@ private extension StringProtocol { /// - returns: `true` if the pattern matches the input, `false` otherwise. /// /// - note: On Windows and when using the ``FnmatchOptions/pathname`` option, both separators (`/` and `\`) are recognized (see note on pattern parameter). -public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, options: FnmatchOptions = .default, pathSeparators: Set = Path.pathSeparators) throws +public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, options: FnmatchOptions = .default, pathSeparators: (some Collection)? = ([Character]?).none) throws -> Bool { // Use Substrings to avoid String allocations @@ -76,32 +66,32 @@ public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, op return false } case "?": - guard let _sc = input.first else { + guard let _sc = input.utf8.first else { return false } - if options.contains(.pathname) && pathSeparators.contains(_sc) { + if options.contains(.pathname) && Path.isUTF8PathSeparator(_sc, separators: pathSeparators) { if backtrack() { return false } } input = input.dropFirst() case "*": - var p = pattern.first - while pattern.first == "*" { + var p = pattern.utf8.first + while pattern.utf8.first == UInt8(ascii: "*") { // consume multiple '*' in pattern pattern = pattern.dropFirst() - p = pattern.first + p = pattern.utf8.first } if p == nil { if options.contains(.pathname) { // make sure input does not have any more path separators - return input.firstIndex(matching: pathSeparators) == nil ? true : false + return Path.firstPathSeparatorIndex(in: input, separators: pathSeparators) == nil } else { return true // pattern matched everything else in input } - } else if pattern.first == "/" && options.contains(.pathname) { + } else if p == UInt8(ascii: "/") && options.contains(.pathname) { // we have a '*/' pattern input must have an path separators to continue - guard let newInputIndex = input.firstIndex(matching: pathSeparators) else { + guard let newInputIndex = Path.firstPathSeparatorIndex(in: input, separators: pathSeparators) else { return false } input.removeSubrange(..) -> RangeStatus { +private func rangematch(pattern: inout Substring, input: inout Substring, test: Character, options: FnmatchOptions, pathSeparators: (some Collection)? = ([Character]?).none) -> RangeStatus { var test = test - if !pattern.contains("]") { + if !pattern.utf8.contains(UInt8(ascii: "]")) { // unmatched '[' test as literal '[' return "[" == test ? .match : .noMatch } - let negate = pattern.first == "!" + let negate = pattern.utf8.first == UInt8(ascii: "!") if negate { pattern = pattern.dropFirst() } @@ -198,13 +188,13 @@ private func rangematch(pattern: inout Substring, input: inout Substring, test: if c == "]" { break } - if options.contains(.pathname) && pathSeparators.contains(c) { + if options.contains(.pathname) && Path.isPathSeparator(c, separators: pathSeparators) { return .noMatch } if options.contains(.caseInsensitive) { c = Character(c.lowercased()) } - if pattern.first == "-" { + if pattern.utf8.first == UInt8(ascii: "-") { let subPattern = pattern.dropFirst() if var c2 = subPattern.first { if c2 != "]" { diff --git a/Tests/SWBUtilTests/FnmatchTests.swift b/Tests/SWBUtilTests/FnmatchTests.swift index d8f68df4..fc388083 100644 --- a/Tests/SWBUtilTests/FnmatchTests.swift +++ b/Tests/SWBUtilTests/FnmatchTests.swift @@ -247,7 +247,7 @@ import SWBUtil @Test(arguments: [true, false]) func pathnameMatch(isWindows: Bool) throws { - let separators = isWindows ? Set("\\/") : Set([Character("/")]) + let separators = isWindows ? "\\/" : "/" try assertFnmatch(pattern: "x?y", input: "x/y", separators: separators) try assertFnmatch(pattern: "x?y", input: "x/y", shouldMatch: false, options: [.pathname], separators: separators) @@ -272,7 +272,7 @@ import SWBUtil } func assertFnmatch( - pattern: String, input: String, shouldMatch: Bool = true, options: FnmatchOptions = .default, separators: Set = Path.pathSeparators, sourceLocation: SourceLocation = #_sourceLocation) throws { + pattern: String, input: String, shouldMatch: Bool = true, options: FnmatchOptions = .default, separators: (some Collection)? = ([Character]?).none, sourceLocation: SourceLocation = #_sourceLocation) throws { let comment = Comment(stringLiteral: "\(pattern) \(shouldMatch ? "should" : "should not") match \(input)") let result = try fnmatch(pattern: pattern, input: input, options: options, pathSeparators: separators) shouldMatch ? #expect(result, comment, sourceLocation: sourceLocation) : #expect(!result, comment, sourceLocation: sourceLocation)