Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 135 additions & 68 deletions src/build/adapter_filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,107 +3,170 @@
#include <sourcemeta/core/io.h>

#include <cassert> // assert
#include <chrono> // std::chrono::nanoseconds, std::chrono::duration_cast
#include <cstdint> // std::int64_t
#include <fstream> // std::ofstream, std::ifstream
#include <mutex> // std::unique_lock
#include <string> // std::string
#include <string_view> // std::string_view

namespace sourcemeta::one {
static constexpr std::string_view DEPENDENCIES_FILE{"deps.txt"};

constexpr std::string_view DEPENDENCIES_EXTENSION{".deps"};
namespace sourcemeta::one {

BuildAdapterFilesystem::BuildAdapterFilesystem(
const std::filesystem::path &output_root)
: root{std::filesystem::canonical(output_root)} {}

auto BuildAdapterFilesystem::dependencies_path(const node_type &path) const
-> node_type {
assert(path.is_absolute());
return path.string() + std::string{DEPENDENCIES_EXTENSION};
}

auto BuildAdapterFilesystem::read_dependencies(const node_type &path) const
-> std::optional<BuildDependencies<node_type>> {
assert(path.is_absolute());
const auto dependencies_path{this->dependencies_path(path)};

std::ifstream stream{dependencies_path};
if (!stream.is_open()) {
return std::nullopt;
: root{std::filesystem::canonical(output_root)} {
const auto deps_path{this->root / DEPENDENCIES_FILE};
if (!std::filesystem::exists(deps_path)) {
return;
}

std::string contents{std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>()};

BuildDependencies<node_type> result;
std::size_t position{0};
while (position < contents.size()) {
auto newline{contents.find('\n', position)};
if (newline == std::string::npos) {
newline = contents.size();
try {
std::ifstream stream{deps_path};
if (!stream.is_open()) {
return;
}

auto end{newline};
// Prevent CRLF on Windows
if (end > position && contents[end - 1] == '\r') {
end -= 1;
}
std::string contents{std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>()};

if (end > position) {
auto kind{BuildDependencyKind::Static};
std::filesystem::path dependency;
const auto length{end - position};
if (length >= 2 && contents[position + 1] == ' ' &&
(contents[position] == 's' || contents[position] == 'd')) {
kind = (contents[position] == 'd') ? BuildDependencyKind::Dynamic
: BuildDependencyKind::Static;
dependency = contents.substr(position + 2, end - position - 2);
} else {
dependency = contents.substr(position, length);
std::string current_key;
BuildDependencies<node_type> current_deps;
std::size_t position{0};

while (position < contents.size()) {
auto newline{contents.find('\n', position)};
if (newline == std::string::npos) {
newline = contents.size();
}

if (newline <= position + 2 || contents[position + 1] != ' ') {
position = newline + 1;
continue;
}
if (!dependency.is_absolute()) {
dependency = (this->root / dependency).lexically_normal();

const char tag{contents[position]};
const std::string_view value{contents.data() + position + 2,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parsing deps.txt lines via value doesn’t strip a trailing \r, so CRLF files can end up with keys/paths that include \r and won’t match later lookups. This is especially likely on Windows where text-mode writes typically produce CRLF.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

newline - position - 2};

switch (tag) {
case 't':
if (!current_key.empty()) {
this->dependencies_map.insert_or_assign(current_key,
std::move(current_deps));
current_deps = {};
}

current_key = value;
break;
case 's':
current_deps.emplace_back(
BuildDependencyKind::Static,
(this->root / std::string{value}).lexically_normal());
break;
case 'd':
current_deps.emplace_back(
BuildDependencyKind::Dynamic,
(this->root / std::string{value}).lexically_normal());
break;
case 'm': {
const auto space{value.find(' ')};
if (space != std::string_view::npos) {
const auto path_part{value.substr(0, space)};
const auto ns_part{value.substr(space + 1)};
const std::chrono::nanoseconds nanoseconds{
std::stoll(std::string{ns_part})};
const auto mark_value{mark_type{
std::chrono::duration_cast<mark_type::duration>(nanoseconds)}};
this->marks.insert_or_assign(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marks loaded from deps.txt are cached even if the corresponding output file no longer exists; later mark() can return this cached value and build() may skip regenerating a missing destination. That can cascade into failures like Output::track() asserting the file exists even though it was deleted between runs.

Severity: high

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

(this->root / std::string{path_part}).lexically_normal(),
mark_value);
}

break;
}
default:
break;
}

result.emplace_back(kind, std::move(dependency));
position = newline + 1;
}

position = newline + 1;
if (!current_key.empty()) {
this->dependencies_map.insert_or_assign(current_key,
std::move(current_deps));
}
this->has_previous_run = true;
} catch (...) {
this->dependencies_map.clear();
this->marks.clear();
}
}

if (result.empty()) {
auto BuildAdapterFilesystem::read_dependencies(const node_type &path) const
-> std::optional<BuildDependencies<node_type>> {
assert(path.is_absolute());
const auto key{path.lexically_relative(this->root).string()};
std::shared_lock lock{this->dependencies_mutex};
const auto match{this->dependencies_map.find(key)};
if (match == this->dependencies_map.end() || match->second.empty()) {
return std::nullopt;
} else {
return result;
}

return match->second;
}

auto BuildAdapterFilesystem::write_dependencies(
const node_type &path, const BuildDependencies<node_type> &dependencies)
-> void {
assert(path.is_absolute());
assert(std::filesystem::exists(path));
// Try to make sure as much as we can that any write operation made to disk
sourcemeta::core::flush(path);
this->refresh(path);
const auto dependencies_path{this->dependencies_path(path)};
std::filesystem::create_directories(dependencies_path.parent_path());
std::ofstream dependencies_stream{dependencies_path};
assert(!dependencies_stream.fail());
for (const auto &dependency : dependencies) {
const auto prefix{dependency.first == BuildDependencyKind::Dynamic ? "d "
: "s "};
const auto relative{dependency.second.lexically_relative(this->root)};
const auto key{path.lexically_relative(this->root).string()};
std::unique_lock lock{this->dependencies_mutex};
this->dependencies_map.insert_or_assign(key, dependencies);
}

auto BuildAdapterFilesystem::flush_dependencies(
const std::function<bool(const node_type &)> &filter) -> void {
const auto deps_path{this->root / DEPENDENCIES_FILE};
std::ofstream stream{deps_path};
assert(!stream.fail());

for (const auto &entry : this->dependencies_map) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flush_dependencies() iterates dependencies_map/marks without taking the associated locks; if any thread is still calling write_dependencies()/refresh(), this becomes a data race/UB. If the method is intended to be thread-safe, it likely needs shared locking (or an explicit contract that all work is joined first).

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

if (!filter(this->root / entry.first)) {
continue;
}

stream << "t " << entry.first << '\n';
for (const auto &dependency : entry.second) {
const char kind_char{
dependency.first == BuildDependencyKind::Dynamic ? 'd' : 's'};
const auto relative{dependency.second.lexically_relative(this->root)};
if (!relative.empty() && *relative.begin() != "..") {
stream << kind_char << ' ' << relative.string() << '\n';
} else {
stream << kind_char << ' ' << dependency.second.string() << '\n';
}
}
}

for (const auto &entry : this->marks) {
const auto relative{entry.first.lexically_relative(this->root)};
if (!relative.empty() && *relative.begin() != "..") {
dependencies_stream << prefix << relative.string() << "\n";
} else {
dependencies_stream << prefix << dependency.second.string() << "\n";
const auto nanoseconds{
std::chrono::duration_cast<std::chrono::nanoseconds>(
entry.second.time_since_epoch())
.count()};
stream << "m " << relative.string() << ' '
<< static_cast<std::int64_t>(nanoseconds) << '\n';
}
}

dependencies_stream.flush();
dependencies_stream.close();
sourcemeta::core::flush(dependencies_path);
stream.flush();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deps.txt is only stream.flush()’d/closed, but it’s no longer passed through sourcemeta::core::flush like the previous per-target .deps writes were. If incremental rebuild correctness relies on this file surviving crashes/power loss, it may need the same durability treatment.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

stream.close();
}

auto BuildAdapterFilesystem::refresh(const node_type &path) -> void {
Expand All @@ -129,11 +192,15 @@ auto BuildAdapterFilesystem::mark(const node_type &path)
}
}

// Output files should always have their marks cached
// Only input files or new output files are not
assert(!this->has_previous_run ||
!path.string().starts_with(this->root.string()) ||
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path.string().starts_with(this->root.string()) check can misclassify paths that merely share a prefix (e.g., /out vs /out2) and can be brittle with normalization/case differences. That could make this debug assertion fire for non-output files or miss real output-root paths.

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

!std::filesystem::exists(path));

try {
const auto value{std::filesystem::last_write_time(path)};
// Within a single run, if we didn't build this file, its mtime won't
// change. If we did build it, refreshing already set a synthetic timestamp
// that the cache lookup above would have returned instead
// Cache for the rest of this run since input files don't change
std::unique_lock lock{this->mutex};
this->marks.emplace(path, value);
return value;
Expand Down
10 changes: 8 additions & 2 deletions src/build/include/sourcemeta/one/build_adapter_filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
// NOLINTEND(misc-include-cleaner)

#include <filesystem> // std::filesystem
#include <functional> // std::function
#include <optional> // std::optional
#include <shared_mutex> // std::shared_mutex
#include <string> // std::string
#include <unordered_map> // std::unordered_map

namespace sourcemeta::one {
Expand All @@ -23,13 +25,13 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildAdapterFilesystem {

BuildAdapterFilesystem(const std::filesystem::path &output_root);

[[nodiscard]] auto dependencies_path(const node_type &path) const
-> node_type;
[[nodiscard]] auto read_dependencies(const node_type &path) const
-> std::optional<BuildDependencies<node_type>>;
auto write_dependencies(const node_type &path,
const BuildDependencies<node_type> &dependencies)
-> void;
auto flush_dependencies(const std::function<bool(const node_type &)> &filter)
-> void;
auto refresh(const node_type &path) -> void;
[[nodiscard]] auto mark(const node_type &path) -> std::optional<mark_type>;
[[nodiscard]] auto is_newer_than(const mark_type left,
Expand All @@ -39,6 +41,10 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildAdapterFilesystem {
std::filesystem::path root;
std::unordered_map<node_type, mark_type> marks;
std::shared_mutex mutex;
std::unordered_map<std::string, BuildDependencies<node_type>>
dependencies_map;
mutable std::shared_mutex dependencies_mutex;
bool has_previous_run{false};
};

} // namespace sourcemeta::one
Expand Down
40 changes: 25 additions & 15 deletions src/index/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ DISPATCH(const std::filesystem::path &destination,

// We need to mark files regardless of whether they were generated or not
output.track(destination);
output.track(destination.string() + ".deps");
return was_built;
}

Expand Down Expand Up @@ -193,12 +192,17 @@ static auto index_main(const std::string_view &program,
// (6) Store a mark of the One version for target dependencies
/////////////////////////////////////////////////////////////////////////////

sourcemeta::one::BuildAdapterFilesystem adapter{output.path()};

// We do this so that targets can be re-built if the One version changes
const auto mark_version_path{output.path() / "version.json"};
// Note we only write back if the content changed in order to not accidentally
// bump up the file modified time
output.write_json_if_different(
mark_version_path, sourcemeta::core::JSON{sourcemeta::one::version()});
if (output.write_json_if_different(
mark_version_path,
sourcemeta::core::JSON{sourcemeta::one::version()})) {
adapter.refresh(mark_version_path);
};

/////////////////////////////////////////////////////////////////////////////
// (7) Store the full configuration file for target dependencies
Expand All @@ -209,17 +213,21 @@ static auto index_main(const std::string_view &program,
const auto mark_configuration_path{output.path() / "configuration.json"};
// Note we only write back if the content changed in order to not accidentally
// bump up the file modified time
output.write_json_if_different(mark_configuration_path, raw_configuration);
if (output.write_json_if_different(mark_configuration_path,
raw_configuration)) {
adapter.refresh(mark_configuration_path);
}

/////////////////////////////////////////////////////////////////////////////
// (8) Store the optional comment for informational purposes
/////////////////////////////////////////////////////////////////////////////

if (app.contains("comment")) {
const auto comment_path{output.path() / "comment.json"};
output.write_json_if_different(
comment_path,
sourcemeta::core::JSON{std::string{app.at("comment").at(0)}});
const auto comment_path{output.path() / "comment.json"};
if (app.contains("comment") &&
output.write_json_if_different(
comment_path,
sourcemeta::core::JSON{std::string{app.at("comment").at(0)}})) {
adapter.refresh(comment_path);
}

PROFILE_END(profiling, "Startup");
Expand Down Expand Up @@ -309,7 +317,6 @@ static auto index_main(const std::string_view &program,
const auto schemas_path{output.path() / "schemas"};
const auto display_schemas_path{
std::filesystem::relative(schemas_path, output.path())};
sourcemeta::one::BuildAdapterFilesystem adapter{output.path()};
sourcemeta::core::parallel_for_each(
resolver.begin(), resolver.end(),
[&output, &schemas_path, &resolver, &mutex, &adapter,
Expand All @@ -322,8 +329,6 @@ static auto index_main(const std::string_view &program,
DISPATCH<sourcemeta::one::GENERATE_MATERIALISED_SCHEMA>(
destination,
{sourcemeta::one::make_dependency(schema.second.path),
// This target depends on the configuration file given things like
// resolve maps and base URIs
sourcemeta::one::make_dependency(mark_configuration_path),
sourcemeta::one::make_dependency(mark_version_path)},
{schema.first, resolver}, mutex, "Ingesting", schema.first,
Expand Down Expand Up @@ -573,15 +578,14 @@ static auto index_main(const std::string_view &program,
for (const auto &schema : resolver) {
auto dependents_path{schemas_path / schema.second.relative_path / SENTINEL /
"dependents.metapack"};
const auto dependents_deps_path{dependents_path.string() + ".deps"};
if (affected_dependents.contains(schema.first) ||
!std::filesystem::exists(dependents_path) ||
!std::filesystem::exists(dependents_deps_path)) {
// TODO: This is potentially pretty slow?
!adapter.read_dependencies(dependents_path).has_value()) {
rework_entries.push_back(
{std::cref(schema.first), std::move(dependents_path)});
} else {
output.track(dependents_path);
output.track(dependents_path.string() + ".deps");
}
}

Expand Down Expand Up @@ -792,6 +796,12 @@ static auto index_main(const std::string_view &program,

// TODO: Print the size of the output directory here

// TODO: This level of coupling means that the output and the adapter should
// be one
adapter.flush_dependencies([&output](const auto &target) {
return !output.is_untracked_file(target);
});
output.track(output.path() / "deps.txt");
output.remove_unknown_files();

PROFILE_END(profiling, "Cleanup");
Expand Down
Loading