From 0e0c8f396d59d24b043287a177d4b9d5236f1bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20R=C3=B6nnqvist?= Date: Tue, 4 Nov 2025 12:15:53 +0100 Subject: [PATCH 1/5] Use Swift Concurrency in context initialization --- .../Infrastructure/DocumentationContext.swift | 123 ++++++------------ 1 file changed, 41 insertions(+), 82 deletions(-) diff --git a/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift b/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift index 9d4d83216b..1b55f158e9 100644 --- a/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift +++ b/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift @@ -218,7 +218,7 @@ public class DocumentationContext { self.linkResolver = LinkResolver(dataProvider: dataProvider) ResolvedTopicReference.enableReferenceCaching(for: inputs.id) - try register() + try await register() } /// Perform semantic analysis on a given `document` at a given `source` location and append any problems found to `problems`. @@ -1931,7 +1931,7 @@ public class DocumentationContext { /** Register a documentation bundle with this context. */ - private func register() throws { + private func register() async throws { try shouldContinueRegistration() let currentFeatureFlags: FeatureFlags? @@ -1963,111 +1963,61 @@ public class DocumentationContext { } } - // Note: Each bundle is registered and processed separately. - // Documents and symbols may both reference each other so the bundle is registered in 4 steps - - // In the bundle discovery phase all tasks run in parallel as they don't depend on each other. - let discoveryGroup = DispatchGroup() - let discoveryQueue = DispatchQueue(label: "org.swift.docc.Discovery", qos: .unspecified, attributes: .concurrent, autoreleaseFrequency: .workItem) - - let discoveryError = Synchronized<(any Error)?>(nil) + // Documents and symbols may both reference each other so the inputs is registered in 4 steps - // Load all bundle symbol graphs into the loader. - var symbolGraphLoader: SymbolGraphLoader! - var hierarchyBasedResolver: PathHierarchyBasedLinkResolver! - - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - symbolGraphLoader = SymbolGraphLoader( + // Load symbol information and construct data structures that only rely on symbol information. + async let loadSymbols = { [signposter, inputs, dataProvider, configuration] in + var symbolGraphLoader = SymbolGraphLoader( bundle: inputs, dataProvider: dataProvider, symbolGraphTransformer: configuration.convertServiceConfiguration.symbolGraphTransformer ) - do { - try signposter.withIntervalSignpost("Load symbols", id: signposter.makeSignpostID()) { + try signposter.withIntervalSignpost("Load symbols", id: signposter.makeSignpostID()) { + try autoreleasepool { try symbolGraphLoader.loadAll() } - hierarchyBasedResolver = signposter.withIntervalSignpost("Build PathHierarchy", id: signposter.makeSignpostID()) { + } + try shouldContinueRegistration() + let hierarchyBasedResolver = signposter.withIntervalSignpost("Build PathHierarchy", id: signposter.makeSignpostID()) { + autoreleasepool { PathHierarchyBasedLinkResolver(pathHierarchy: PathHierarchy( symbolGraphLoader: symbolGraphLoader, bundleName: urlReadablePath(inputs.displayName), knownDisambiguatedPathComponents: configuration.convertServiceConfiguration.knownDisambiguatedSymbolPathComponents )) } - - self.snippetResolver = SnippetResolver(symbolGraphLoader: symbolGraphLoader) - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) } - } + + let snippetResolver = SnippetResolver(symbolGraphLoader: symbolGraphLoader) + + return (symbolGraphLoader, hierarchyBasedResolver, snippetResolver) + }() - // First, all the resources are added since they don't reference anything else. - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - try signposter.withIntervalSignpost("Load resources", id: signposter.makeSignpostID()) { - try self.registerMiscResources() - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load resources like images and videos + async let loadResources: Void = try signposter.withIntervalSignpost("Load resources", id: signposter.makeSignpostID()) { + try autoreleasepool { + try self.registerMiscResources() } } - // Second, all the documents and symbols are added. - // - // Note: Documents and symbols may look up resources at this point but shouldn't lookup other documents or - // symbols or attempt to resolve links/references since the topic graph may not contain all documents - // or all symbols yet. - var result: ( - tutorialTableOfContentsResults: [SemanticResult], - tutorials: [SemanticResult], - tutorialArticles: [SemanticResult], - articles: [SemanticResult
], - documentationExtensions: [SemanticResult
] - )! - - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - result = try signposter.withIntervalSignpost("Load documents", id: signposter.makeSignpostID()) { - try self.registerDocuments() - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load documents + async let loadDocuments = try signposter.withIntervalSignpost("Load documents", id: signposter.makeSignpostID()) { + try autoreleasepool { + try self.registerDocuments() } } - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - try signposter.withIntervalSignpost("Load external resolvers", id: signposter.makeSignpostID()) { - try linkResolver.loadExternalResolvers(dependencyArchives: configuration.externalDocumentationConfiguration.dependencyArchives) - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load any external resolvers + async let loadExternalResolvers: Void = try signposter.withIntervalSignpost("Load external resolvers", id: signposter.makeSignpostID()) { + try autoreleasepool { + try linkResolver.loadExternalResolvers(dependencyArchives: configuration.externalDocumentationConfiguration.dependencyArchives) } } - discoveryGroup.wait() - - try shouldContinueRegistration() - - // Re-throw discovery errors - if let encounteredError = discoveryError.sync({ $0 }) { - throw encounteredError - } - // All discovery went well, process the inputs. - let (tutorialTableOfContentsResults, tutorials, tutorialArticles, allArticles, documentationExtensions) = result + let (tutorialTableOfContentsResults, tutorials, tutorialArticles, allArticles, documentationExtensions) = try await loadDocuments + try shouldContinueRegistration() var (otherArticles, rootPageArticles) = splitArticles(allArticles) let globalOptions = (allArticles + documentationExtensions).compactMap { article in @@ -2107,7 +2057,10 @@ public class DocumentationContext { options = globalOptions.first } + let (symbolGraphLoader, hierarchyBasedResolver, snippetResolver) = try await loadSymbols + try shouldContinueRegistration() self.linkResolver.localResolver = hierarchyBasedResolver + self.snippetResolver = snippetResolver hierarchyBasedResolver.addMappingForRoots(bundle: inputs) for tutorial in tutorials { hierarchyBasedResolver.addTutorial(tutorial) @@ -2120,9 +2073,10 @@ public class DocumentationContext { } registerRootPages(from: rootPageArticles) + try registerSymbols(symbolGraphLoader: symbolGraphLoader, documentationExtensions: documentationExtensions) // We don't need to keep the loader in memory after we've registered all symbols. - symbolGraphLoader = nil + _ = consume symbolGraphLoader try shouldContinueRegistration() @@ -2149,12 +2103,17 @@ public class DocumentationContext { try shouldContinueRegistration() } + _ = try await loadExternalResolvers + // Third, any processing that relies on resolving other content is done, mainly resolving links. preResolveExternalLinks(semanticObjects: tutorialTableOfContentsResults.map(referencedSemanticObject) + tutorials.map(referencedSemanticObject) + tutorialArticles.map(referencedSemanticObject)) + // References to resources aren't used until the links are resolved + _ = try await loadResources + resolveLinks( tutorialTableOfContents: tutorialTableOfContentsResults, tutorials: tutorials, From a6fc1ecb11fe79be96f4d74103d473592dd6d0bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20R=C3=B6nnqvist?= Date: Tue, 4 Nov 2025 15:34:25 +0100 Subject: [PATCH 2/5] Use Swift Concurrency for convert action rendering --- .../ConvertActionConverter.swift | 197 ++++++++---------- .../Actions/Convert/ConvertAction.swift | 23 +- ...recatedDiagnosticsDigestWarningTests.swift | 6 +- .../TestRenderNodeOutputConsumer.swift | 2 +- .../MergeActionTests.swift | 3 +- 5 files changed, 101 insertions(+), 130 deletions(-) diff --git a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift index 1673f7b33d..14a0f29b44 100644 --- a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift +++ b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift @@ -29,14 +29,13 @@ package enum ConvertActionConverter { /// - sourceRepository: The source repository where the documentation's sources are hosted. /// - emitDigest: Whether the conversion should pass additional metadata output––such as linkable entities information, indexing information, or asset references by asset type––to the consumer. /// - documentationCoverageOptions: The level of experimental documentation coverage information that the conversion should pass to the consumer. - /// - Returns: A list of problems that occurred during the conversion (excluding the problems that the context already encountered). package static func convert( context: DocumentationContext, outputConsumer: some ConvertOutputConsumer & ExternalNodeConsumer, sourceRepository: SourceRepository?, emitDigest: Bool, documentationCoverageOptions: DocumentationCoverageOptions - ) throws -> [Problem] { + ) async throws { let signposter = Self.signposter defer { @@ -54,7 +53,7 @@ package enum ConvertActionConverter { if emitDigest { try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems) } - return [] + return } // Precompute the render context @@ -71,36 +70,7 @@ package enum ConvertActionConverter { renderContext: renderContext, sourceRepository: sourceRepository ) - - // Arrays to gather additional metadata if `emitDigest` is `true`. - var indexingRecords = [IndexingRecord]() - var linkSummaries = [LinkDestinationSummary]() - var assets = [RenderReferenceType : [any RenderReference]]() - var coverageInfo = [CoverageDataEntry]() - let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() - - // An inner function to gather problems for errors encountered during the conversion. - // - // These problems only represent unexpected thrown errors and aren't particularly user-facing. - // For now we emit them as diagnostics because `DocumentationConverter.convert(outputConsumer:)` (which this replaced) used to do that. - // - // FIXME: In the future we could simplify this control flow by not catching these errors and turning them into diagnostics. - // Since both error-level diagnostics and thrown errors fail the documentation build, - // the only practical different this would have is that we stop on the first unexpected error instead of processing all pages and gathering all unexpected errors. - func recordProblem(from error: any Swift.Error, in problems: inout [Problem], withIdentifier identifier: String) { - let problem = Problem(diagnostic: Diagnostic( - severity: .error, - identifier: "org.swift.docc.documentation-converter.\(identifier)", - summary: error.localizedDescription - ), possibleSolutions: []) - - context.diagnosticEngine.emit(problem) - problems.append(problem) - } - - let resultsSyncQueue = DispatchQueue(label: "Convert Serial Queue", qos: .unspecified, attributes: []) - let resultsGroup = DispatchGroup() - + // Consume external links and add them into the sidebar. for externalLink in context.externalCache { // Here we're associating the external node with the **current** bundle's bundle ID. @@ -112,112 +82,112 @@ package enum ConvertActionConverter { let renderSignpostHandle = signposter.beginInterval("Render", id: signposter.makeSignpostID(), "Render \(context.knownPages.count) pages") - var conversionProblems: [Problem] = context.knownPages.concurrentPerform { identifier, results in - // If cancelled skip all concurrent conversion work in this block. - guard !Task.isCancelled else { return } + // Render all pages and gather their supplementary "digest" information if enabled. + let supplementaryRenderInfo = try await withThrowingTaskGroup(of: SupplementaryRenderInformation.self) { taskGroup in + let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() + // Iterate over all the known pages in chunks + var remaining = context.knownPages[...] - // Wrap JSON encoding in an autorelease pool to avoid retaining the autoreleased ObjC objects returned by `JSONSerialization` - autoreleasepool { - do { - let entity = try context.entity(with: identifier) - - guard let renderNode = converter.renderNode(for: entity) else { - // No render node was produced for this entity, so just skip it. - return - } + let numberOfBatches = ProcessInfo.processInfo.processorCount * 4 + let numberOfElementsPerTask = Int(Double(remaining.count) / Double(numberOfBatches) + 1) + + while !remaining.isEmpty { + let slice = remaining.prefix(numberOfElementsPerTask) + remaining = remaining.dropFirst(numberOfElementsPerTask) + + // Start work of one slice of the known pages + taskGroup.addTask { + var supplementaryRenderInfo = SupplementaryRenderInformation() - try outputConsumer.consume(renderNode: renderNode) + for identifier in slice { + try autoreleasepool { + let entity = try context.entity(with: identifier) + + guard let renderNode = converter.renderNode(for: entity) else { + // No render node was produced for this entity, so just skip it. + return + } + + try outputConsumer.consume(renderNode: renderNode) - switch documentationCoverageOptions.level { - case .detailed, .brief: - let coverageEntry = try CoverageDataEntry( - documentationNode: entity, - renderNode: renderNode, - context: context - ) - if coverageFilterClosure(coverageEntry) { - resultsGroup.async(queue: resultsSyncQueue) { - coverageInfo.append(coverageEntry) + switch documentationCoverageOptions.level { + case .detailed, .brief: + let coverageEntry = try CoverageDataEntry( + documentationNode: entity, + renderNode: renderNode, + context: context + ) + if coverageFilterClosure(coverageEntry) { + supplementaryRenderInfo.coverageInfo.append(coverageEntry) + } + case .none: + break + } + + if emitDigest { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) + let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) + + supplementaryRenderInfo.assets.merge(renderNode.assetReferences, uniquingKeysWith: +) + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) + supplementaryRenderInfo.indexingRecords.append(contentsOf: nodeIndexingRecords) + } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) + + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) } } - case .none: - break } - if emitDigest { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) - let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) - - resultsGroup.async(queue: resultsSyncQueue) { - assets.merge(renderNode.assetReferences, uniquingKeysWith: +) - linkSummaries.append(contentsOf: nodeLinkSummaries) - indexingRecords.append(contentsOf: nodeIndexingRecords) - } - } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) - - resultsGroup.async(queue: resultsSyncQueue) { - linkSummaries.append(contentsOf: nodeLinkSummaries) - } - } - } catch { - recordProblem(from: error, in: &results, withIdentifier: "render-node") + return supplementaryRenderInfo } } + + var aggregateSupplementaryRenderInfo = SupplementaryRenderInformation() + + for try await partialInfo in taskGroup { + aggregateSupplementaryRenderInfo.assets.merge(partialInfo.assets, uniquingKeysWith: +) + aggregateSupplementaryRenderInfo.linkSummaries.append(contentsOf: partialInfo.linkSummaries) + aggregateSupplementaryRenderInfo.indexingRecords.append(contentsOf: partialInfo.indexingRecords) + aggregateSupplementaryRenderInfo.coverageInfo.append(contentsOf: partialInfo.coverageInfo) + } + + return aggregateSupplementaryRenderInfo } - // Wait for any concurrent updates to complete. - resultsGroup.wait() - signposter.endInterval("Render", renderSignpostHandle) - guard !Task.isCancelled else { return [] } + guard !Task.isCancelled else { return } // Write various metadata if emitDigest { - signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { - do { - try outputConsumer.consume(linkableElementSummaries: linkSummaries) - try outputConsumer.consume(indexingRecords: indexingRecords) - try outputConsumer.consume(assets: assets) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "metadata") - } + try signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { + try outputConsumer.consume(linkableElementSummaries: supplementaryRenderInfo.linkSummaries) + try outputConsumer.consume(indexingRecords: supplementaryRenderInfo.indexingRecords) + try outputConsumer.consume(assets: supplementaryRenderInfo.assets) } } if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { - signposter.withIntervalSignpost("Serialize link hierarchy", id: signposter.makeSignpostID()) { - do { - let serializableLinkInformation = try context.linkResolver.localResolver.prepareForSerialization(bundleID: context.inputs.id) - try outputConsumer.consume(linkResolutionInformation: serializableLinkInformation) - - if !emitDigest { - try outputConsumer.consume(linkableElementSummaries: linkSummaries) - } - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "link-resolver") + try signposter.withIntervalSignpost("Serialize link hierarchy", id: signposter.makeSignpostID()) { + let serializableLinkInformation = try context.linkResolver.localResolver.prepareForSerialization(bundleID: context.inputs.id) + try outputConsumer.consume(linkResolutionInformation: serializableLinkInformation) + + if !emitDigest { + try outputConsumer.consume(linkableElementSummaries: supplementaryRenderInfo.linkSummaries) } } } if emitDigest { - signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { - do { - try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems + conversionProblems) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "problems") - } + try signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { + try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems) } } switch documentationCoverageOptions.level { case .detailed, .brief: - do { - try outputConsumer.consume(documentationCoverageInfo: coverageInfo) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "coverage") - } + try outputConsumer.consume(documentationCoverageInfo: supplementaryRenderInfo.coverageInfo) case .none: break } @@ -232,7 +202,12 @@ package enum ConvertActionConverter { benchmark(add: Benchmark.ExternalTopicsHash(context: context)) // Log the peak memory. benchmark(add: Benchmark.PeakMemory()) - - return conversionProblems } } + +private struct SupplementaryRenderInformation { + var indexingRecords = [IndexingRecord]() + var linkSummaries = [LinkDestinationSummary]() + var assets = [RenderReferenceType : [any RenderReference]]() + var coverageInfo = [CoverageDataEntry]() +} diff --git a/Sources/SwiftDocCUtilities/Action/Actions/Convert/ConvertAction.swift b/Sources/SwiftDocCUtilities/Action/Actions/Convert/ConvertAction.swift index b6c98ecedd..9a92c40890 100644 --- a/Sources/SwiftDocCUtilities/Action/Actions/Convert/ConvertAction.swift +++ b/Sources/SwiftDocCUtilities/Action/Actions/Convert/ConvertAction.swift @@ -313,19 +313,16 @@ public struct ConvertAction: AsyncAction { } } - let analysisProblems: [Problem] - let conversionProblems: [Problem] do { - conversionProblems = try signposter.withIntervalSignpost("Process") { - try ConvertActionConverter.convert( - context: context, - outputConsumer: outputConsumer, - sourceRepository: sourceRepository, - emitDigest: emitDigest, - documentationCoverageOptions: documentationCoverageOptions - ) - } - analysisProblems = context.problems + let processInterval = signposter.beginInterval("Process", id: signposter.makeSignpostID()) + try await ConvertActionConverter.convert( + context: context, + outputConsumer: outputConsumer, + sourceRepository: sourceRepository, + emitDigest: emitDigest, + documentationCoverageOptions: documentationCoverageOptions + ) + signposter.endInterval("Process", processInterval) } catch { if emitDigest { let problem = Problem(description: (error as? (any DescribedError))?.errorDescription ?? error.localizedDescription, source: nil) @@ -335,7 +332,7 @@ public struct ConvertAction: AsyncAction { throw error } - var didEncounterError = analysisProblems.containsErrors || conversionProblems.containsErrors + var didEncounterError = context.problems.containsErrors let hasTutorial = context.knownPages.contains(where: { guard let kind = try? context.entity(with: $0).kind else { return false } return kind == .tutorial || kind == .tutorialArticle diff --git a/Tests/SwiftDocCTests/DeprecatedDiagnosticsDigestWarningTests.swift b/Tests/SwiftDocCTests/DeprecatedDiagnosticsDigestWarningTests.swift index 8cfbad65db..df4ace7e7f 100644 --- a/Tests/SwiftDocCTests/DeprecatedDiagnosticsDigestWarningTests.swift +++ b/Tests/SwiftDocCTests/DeprecatedDiagnosticsDigestWarningTests.swift @@ -13,7 +13,7 @@ import SwiftDocC import SwiftDocCTestUtilities import XCTest -// THIS SHOULD BE REMOVED, RIGHT?! +// FIXME: Remove this after 6.3 is released class DeprecatedDiagnosticsDigestWarningTests: XCTestCase { func testNoDeprecationWarningWhenThereAreNoOtherWarnings() async throws { let catalog = Folder(name: "unit-test.docc", content: [ @@ -26,7 +26,7 @@ class DeprecatedDiagnosticsDigestWarningTests: XCTestCase { let (_, context) = try await loadBundle(catalog: catalog) let outputConsumer = TestOutputConsumer() - _ = try ConvertActionConverter.convert( + try await ConvertActionConverter.convert( context: context, outputConsumer: outputConsumer, sourceRepository: nil, @@ -50,7 +50,7 @@ class DeprecatedDiagnosticsDigestWarningTests: XCTestCase { let (_, context) = try await loadBundle(catalog: catalog) let outputConsumer = TestOutputConsumer() - _ = try ConvertActionConverter.convert( + try await ConvertActionConverter.convert( context: context, outputConsumer: outputConsumer, sourceRepository: nil, diff --git a/Tests/SwiftDocCTests/TestRenderNodeOutputConsumer.swift b/Tests/SwiftDocCTests/TestRenderNodeOutputConsumer.swift index c9eca9a9e9..566c549eb6 100644 --- a/Tests/SwiftDocCTests/TestRenderNodeOutputConsumer.swift +++ b/Tests/SwiftDocCTests/TestRenderNodeOutputConsumer.swift @@ -95,7 +95,7 @@ extension XCTestCase { ) let outputConsumer = TestRenderNodeOutputConsumer() - _ = try ConvertActionConverter.convert( + try await ConvertActionConverter.convert( context: context, outputConsumer: outputConsumer, sourceRepository: sourceRepository, diff --git a/Tests/SwiftDocCUtilitiesTests/MergeActionTests.swift b/Tests/SwiftDocCUtilitiesTests/MergeActionTests.swift index 88acc2a5c2..fcbbc12e16 100644 --- a/Tests/SwiftDocCUtilitiesTests/MergeActionTests.swift +++ b/Tests/SwiftDocCUtilitiesTests/MergeActionTests.swift @@ -940,8 +940,7 @@ class MergeActionTests: XCTestCase { let outputConsumer = ConvertFileWritingConsumer(targetFolder: outputPath, bundleRootFolder: catalogDir, fileManager: fileSystem, context: context, indexer: indexer, transformForStaticHostingIndexHTML: nil, bundleID: inputs.id) - let convertProblems = try ConvertActionConverter.convert(context: context, outputConsumer: outputConsumer, sourceRepository: nil, emitDigest: false, documentationCoverageOptions: .noCoverage) - XCTAssert(convertProblems.isEmpty, "Unexpected problems: \(context.problems.map(\.diagnostic.summary).joined(separator: "\n"))", file: file, line: line) + try await ConvertActionConverter.convert(context: context, outputConsumer: outputConsumer, sourceRepository: nil, emitDigest: false, documentationCoverageOptions: .noCoverage) let navigatorProblems = indexer.finalize(emitJSON: true, emitLMDB: false) XCTAssert(navigatorProblems.isEmpty, "Unexpected problems: \(context.problems.map(\.diagnostic.summary).joined(separator: "\n"))", file: file, line: line) From c0f4e167bf1b12ea4d34d42c53329c5571f19088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20R=C3=B6nnqvist?= Date: Fri, 14 Nov 2025 13:58:36 +0100 Subject: [PATCH 3/5] Divide rendering into smaller tasks, but run fewer parallel tasks at the same time --- .../ConvertActionConverter.swift | 106 +++++++++++------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift index 14a0f29b44..9140958927 100644 --- a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift +++ b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift @@ -88,58 +88,68 @@ package enum ConvertActionConverter { // Iterate over all the known pages in chunks var remaining = context.knownPages[...] - let numberOfBatches = ProcessInfo.processInfo.processorCount * 4 - let numberOfElementsPerTask = Int(Double(remaining.count) / Double(numberOfBatches) + 1) + // Don't run more tasks in parallel than there are cores to run them + let maxParallelTasks: Int = ProcessInfo.processInfo.processorCount + let numberOfElementsPerTask = max( + Int(Double(remaining.count) / Double(maxParallelTasks * 10) + 1), + 25 // An arbitrary smallest task size to avoid some concurrency overhead when there aren't that many pages is too small. + ) - while !remaining.isEmpty { - let slice = remaining.prefix(numberOfElementsPerTask) - remaining = remaining.dropFirst(numberOfElementsPerTask) + func _render(referencesIn slice: consuming ArraySlice) throws -> SupplementaryRenderInformation { + var supplementaryRenderInfo = SupplementaryRenderInformation() - // Start work of one slice of the known pages - taskGroup.addTask { - var supplementaryRenderInfo = SupplementaryRenderInformation() - - for identifier in slice { - try autoreleasepool { - let entity = try context.entity(with: identifier) + for identifier in slice { + try autoreleasepool { + let entity = try context.entity(with: identifier) - guard let renderNode = converter.renderNode(for: entity) else { - // No render node was produced for this entity, so just skip it. - return - } - - try outputConsumer.consume(renderNode: renderNode) + guard let renderNode = converter.renderNode(for: entity) else { + // No render node was produced for this entity, so just skip it. + return + } + + try outputConsumer.consume(renderNode: renderNode) - switch documentationCoverageOptions.level { - case .detailed, .brief: - let coverageEntry = try CoverageDataEntry( - documentationNode: entity, - renderNode: renderNode, - context: context - ) - if coverageFilterClosure(coverageEntry) { - supplementaryRenderInfo.coverageInfo.append(coverageEntry) - } - case .none: - break + switch documentationCoverageOptions.level { + case .detailed, .brief: + let coverageEntry = try CoverageDataEntry( + documentationNode: entity, + renderNode: renderNode, + context: context + ) + if coverageFilterClosure(coverageEntry) { + supplementaryRenderInfo.coverageInfo.append(coverageEntry) } + case .none: + break + } + + if emitDigest { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) + let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) - if emitDigest { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) - let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) - - supplementaryRenderInfo.assets.merge(renderNode.assetReferences, uniquingKeysWith: +) - supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) - supplementaryRenderInfo.indexingRecords.append(contentsOf: nodeIndexingRecords) - } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) - - supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) - } + supplementaryRenderInfo.assets.merge(renderNode.assetReferences, uniquingKeysWith: +) + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) + supplementaryRenderInfo.indexingRecords.append(contentsOf: nodeIndexingRecords) + } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) + + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) } } + } + + return supplementaryRenderInfo + } + + for _ in 0.. Date: Fri, 14 Nov 2025 15:15:36 +0100 Subject: [PATCH 4/5] Extract code for performing concurrent work on slices of a collection --- .../ConvertActionConverter.swift | 65 +++----------- .../Collection+ConcurrentPerform.swift | 88 +++++++++++++++++++ 2 files changed, 101 insertions(+), 52 deletions(-) diff --git a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift index 9140958927..ee56f7b7a3 100644 --- a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift +++ b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift @@ -83,19 +83,10 @@ package enum ConvertActionConverter { let renderSignpostHandle = signposter.beginInterval("Render", id: signposter.makeSignpostID(), "Render \(context.knownPages.count) pages") // Render all pages and gather their supplementary "digest" information if enabled. - let supplementaryRenderInfo = try await withThrowingTaskGroup(of: SupplementaryRenderInformation.self) { taskGroup in - let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() - // Iterate over all the known pages in chunks - var remaining = context.knownPages[...] - - // Don't run more tasks in parallel than there are cores to run them - let maxParallelTasks: Int = ProcessInfo.processInfo.processorCount - let numberOfElementsPerTask = max( - Int(Double(remaining.count) / Double(maxParallelTasks * 10) + 1), - 25 // An arbitrary smallest task size to avoid some concurrency overhead when there aren't that many pages is too small. - ) - - func _render(referencesIn slice: consuming ArraySlice) throws -> SupplementaryRenderInformation { + let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() + let supplementaryRenderInfo = try await context.knownPages._concurrentPerform( + taskName: "Render", + batchWork: { slice in var supplementaryRenderInfo = SupplementaryRenderInformation() for identifier in slice { @@ -111,11 +102,7 @@ package enum ConvertActionConverter { switch documentationCoverageOptions.level { case .detailed, .brief: - let coverageEntry = try CoverageDataEntry( - documentationNode: entity, - renderNode: renderNode, - context: context - ) + let coverageEntry = try CoverageDataEntry(documentationNode: entity, renderNode: renderNode, context: context) if coverageFilterClosure(coverageEntry) { supplementaryRenderInfo.coverageInfo.append(coverageEntry) } @@ -139,41 +126,15 @@ package enum ConvertActionConverter { } return supplementaryRenderInfo + }, + initialResult: SupplementaryRenderInformation(), + combineResults: { accumulated, partialResult in + accumulated.assets.merge(partialResult.assets, uniquingKeysWith: +) + accumulated.linkSummaries.append(contentsOf: partialResult.linkSummaries) + accumulated.indexingRecords.append(contentsOf: partialResult.indexingRecords) + accumulated.coverageInfo.append(contentsOf: partialResult.coverageInfo) } - - for _ in 0..( + taskName: String? = nil, + batchWork: (consuming SubSequence) throws -> PartialResult, + initialResult: Result, + combineResults: (inout Result, consuming PartialResult) -> Void + ) async throws -> Result { + try await withThrowingTaskGroup(of: PartialResult.self, returning: Result.self) { taskGroup in + try await withoutActuallyEscaping(batchWork) { work in + try await withoutActuallyEscaping(combineResults) { combineResults in + var remaining = self[...] + + // Don't run more tasks in parallel than there are cores to run them + let maxParallelTasks: Int = ProcessInfo.processInfo.processorCount + // Finding the right number of tasks is a balancing act. + // If the tasks are too small, then there's increased overhead from scheduling a lot of tasks and accumulating their results. + // If the tasks are too large, then there's a risk that some tasks take longer to complete than others, increasing the amount of idle time. + // + // Here, we aim to schedule at most 10 tasks per core but create fewer tasks if the collection is fairly small to avoid some concurrent overhead. + // The table below shows the approximate number of tasks per CPU core and the number of elements per task, within parenthesis, + // for different collection sizes and number of CPU cores, given a minimum task size of 20 elements: + // + // | 500 | 1000 | 2500 | 5000 | 10000 | 25000 + // ----------|------------|------------|------------|------------|-------------|------------- + // 8 cores | ~3,2 (20) | ~6,3 (20) | ~9,8 (32) | ~9,9 (63) | ~9,9 (126) | ~9,9 (313) + // 12 cores | ~2,1 (20) | ~4,2 (20) | ~10,0 (21) | ~10,0 (42) | ~10,0 (84) | ~10,0 (209) + // 16 cores | ~1,6 (20) | ~3,2 (20) | ~7,9 (20) | ~9,8 (32) | ~9,9 (63) | ~10,0 (157) + // 32 cores | ~0,8 (20) | ~1,6 (20) | ~4,0 (20) | ~7,9 (20) | ~9,8 (32) | ~9,9 (79) + // + let numberOfElementsPerTask: Int = Swift.max( + Int(Double(remaining.count) / Double(maxParallelTasks * 10) + 1), + 20 // (this is a completely arbitrary task size threshold) + ) + + // Start the first round of work. + // If the collection is big, this will add one task per core. + // If the collection is small, this will only add a few tasks. + for _ in 0.. Date: Fri, 14 Nov 2025 17:29:48 +0100 Subject: [PATCH 5/5] Only use named tasks with Swift 6.2 compilers --- .../Utility/Collection+ConcurrentPerform.swift | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift b/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift index b02a0db83e..bec134687d 100644 --- a/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift +++ b/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift @@ -199,9 +199,15 @@ extension Collection { remaining = remaining.dropFirst(numberOfElementsPerTask) // Start work of one slice of the known pages + #if compiler(<6.2) + taskGroup.addTask { + return try work(slice) + } + #else taskGroup.addTask(name: taskName) { return try work(slice) } + #endif } } @@ -220,9 +226,15 @@ extension Collection { remaining = remaining.dropFirst(numberOfElementsPerTask) // Start work of one slice of the known pages + #if compiler(<6.2) + taskGroup.addTask { + return try work(slice) + } + #else taskGroup.addTask(name: taskName) { return try work(slice) } + #endif } }