diff --git a/lib/docker_manager/pitchfork_adapter.rb b/lib/docker_manager/pitchfork_adapter.rb new file mode 100644 index 0000000..13c7189 --- /dev/null +++ b/lib/docker_manager/pitchfork_adapter.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module DockerManager + class PitchforkAdapter < WebServerAdapter + def server_name + "Pitchfork" + end + + def launcher_pid + `pgrep -f unicorn_launcher`.strip.to_i + end + + def master_pid + `pgrep -f "pitchfork monitor"`.strip.to_i + end + end +end diff --git a/lib/docker_manager/unicorn_adapter.rb b/lib/docker_manager/unicorn_adapter.rb new file mode 100644 index 0000000..4c9da52 --- /dev/null +++ b/lib/docker_manager/unicorn_adapter.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module DockerManager + class UnicornAdapter < WebServerAdapter + def server_name + "Unicorn" + end + + def launcher_pid + `pgrep -f unicorn_launcher`.strip.to_i + end + + def master_pid + `pgrep -f "unicorn master -E"`.strip.to_i + end + end +end diff --git a/lib/docker_manager/upgrader.rb b/lib/docker_manager/upgrader.rb index f57040b..06c0c69 100644 --- a/lib/docker_manager/upgrader.rb +++ b/lib/docker_manager/upgrader.rb @@ -1,11 +1,20 @@ # frozen_string_literal: true +require_relative "web_server_adapter" +require_relative "unicorn_adapter" +require_relative "pitchfork_adapter" + class DockerManager::Upgrader + attr_reader :web_server + + delegate :min_workers, :server_name, :launcher_pid, :master_pid, :workers, to: :web_server + def initialize(user_id, repos, from_version) @user_id = user_id @user = User.find(user_id) @repos = repos.is_a?(Array) ? repos : [repos] @from_version = from_version + @web_server = web_server_adapter end def reset! @@ -15,10 +24,6 @@ def reset! status(nil) end - def min_workers - 1 - end - def upgrade return if @repos.any? { |repo| !repo.start_upgrading } @@ -31,40 +36,28 @@ def upgrade log("*** Please be patient, next steps might take a while ***") log("********************************************************") - launcher_pid = unicorn_launcher_pid - master_pid = unicorn_master_pid - workers = unicorn_workers(master_pid).size - - if workers < 2 - log("ABORTING, you do not have enough unicorn workers running") + if workers.size <= min_workers + log("ABORTING, you do not have enough #{server_name} workers running") raise "Not enough workers" end if launcher_pid <= 0 || master_pid <= 0 - log("ABORTING, missing unicorn launcher or unicorn master") - raise "No unicorn master or launcher" + log("ABORTING, missing #{server_name} launcher or master/monitor") + raise "No #{server_name} master or launcher" end percent(5) - log("Cycling Unicorn, to free up memory") - reload_unicorn(launcher_pid) + log("Cycling #{server_name}, to free up memory") + web_server.reload percent(10) reloaded = false - num_workers_spun_down = workers - min_workers + num_workers_spun_down = workers.size - min_workers if num_workers_spun_down.positive? - log "Stopping #{workers - min_workers} Unicorn worker(s), to free up memory" - num_workers_spun_down.times { Process.kill("TTOU", unicorn_master_pid) } - end - - if ENV["UNICORN_SIDEKIQS"].to_i > 0 - log "Stopping job queue to reclaim memory, master pid is #{master_pid}" - Process.kill("TSTP", unicorn_master_pid) - sleep 1 - # older versions do not have support, so quickly send a cont so master process is not hung - Process.kill("CONT", unicorn_master_pid) + log "Stopping #{num_workers_spun_down} #{server_name} worker(s), to free up memory" + web_server.scale_down_workers(num_workers_spun_down) end # HEAD@{upstream} is just a fancy way how to say origin/main (in normal case) @@ -117,7 +110,7 @@ def upgrade run("bundle exec rake s3:upload_assets") if using_s3_assets percent(80) - reload_unicorn(launcher_pid) + web_server.reload reloaded = true # Flush nginx cache here - this is not critical, and the rake task may not exist yet - ignore failures here. @@ -147,13 +140,14 @@ def upgrade end if num_workers_spun_down.to_i.positive? && !reloaded - log "Spinning up #{num_workers_spun_down} Unicorn worker(s) that were stopped initially" - num_workers_spun_down.times { Process.kill("TTIN", unicorn_master_pid) } + log "Spinning up #{num_workers_spun_down} #{server_name} worker(s) that were stopped initially" + web_server.scale_up_workers(num_workers_spun_down) end raise ex ensure @repos.each(&:stop_upgrading) + web_server.clear_restart_flag end def publish(type, value) @@ -269,47 +263,11 @@ def log_version_upgrade private - def pid_exists?(pid) - Process.getpgid(pid) - rescue Errno::ESRCH - false - end - - def unicorn_launcher_pid - `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'`.strip.to_i - end - - def unicorn_master_pid - `ps aux | grep "unicorn master -E" | grep -v "grep" | awk '{print $2}'`.strip.to_i - end - - def unicorn_workers(master_pid) - `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) - end - - def local_web_url - "http://127.0.0.1:#{ENV["UNICORN_PORT"] || 3000}/srv/status" - end - - def reload_unicorn(launcher_pid) - log("Restarting unicorn pid: #{launcher_pid}") - original_master_pid = unicorn_master_pid - Process.kill("USR2", launcher_pid) - - iterations = 0 - while pid_exists?(original_master_pid) - iterations += 1 - break if iterations >= 60 - log("Waiting for Unicorn to reload#{"." * iterations}") - sleep 2 - end - - iterations = 0 - while `curl -s #{local_web_url}` != "ok" - iterations += 1 - break if iterations >= 60 - log("Waiting for Unicorn workers to start up#{"." * iterations}") - sleep 2 - end + def web_server_adapter + if `pgrep -f '^unicorn[^_]'`.present? + DockerManager::UnicornAdapter + else + DockerManager::PitchforkAdapter + end.new(self) end end diff --git a/lib/docker_manager/web_server_adapter.rb b/lib/docker_manager/web_server_adapter.rb new file mode 100644 index 0000000..b54023f --- /dev/null +++ b/lib/docker_manager/web_server_adapter.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module DockerManager + class WebServerAdapter + RESTART_FLAG_KEY = "docker_manager:upgrade:server_restarting" + + attr_reader :upgrader + + delegate :log, to: :upgrader + + def initialize(upgrader) + @upgrader = upgrader + end + + def workers + `pgrep -f -P #{master_pid} worker`.split("\n").map(&:to_i) + end + + def local_web_url + "http://127.0.0.1:#{ENV["UNICORN_PORT"] || 3000}/srv/status" + end + + def scale_down_workers(count) + count.times { Process.kill("TTOU", master_pid) } + end + + def scale_up_workers(count) + count.times { Process.kill("TTIN", master_pid) } + end + + def min_workers + 1 + end + + def reload + set_restart_flag + log("Restarting #{server_name} pid: #{launcher_pid}") + original_master_pid = master_pid + Process.kill("USR2", launcher_pid) + + # Wait for the original master/monitor to exit (it will spawn a new one) + iterations = 0 + while pid_exists?(original_master_pid) + iterations += 1 + break if iterations >= 60 + log("Waiting for #{server_name} to reload#{"." * iterations}") + sleep 2 + end + + # Wait for workers to be ready + iterations = 0 + while `curl -s #{local_web_url}` != "ok" + iterations += 1 + break if iterations >= 60 + log("Waiting for #{server_name} workers to start up#{"." * iterations}") + sleep 2 + end + clear_restart_flag + end + + def set_restart_flag + Discourse.redis.setex(RESTART_FLAG_KEY, 2.minutes.to_i, 1) + end + + def clear_restart_flag + Discourse.redis.del(RESTART_FLAG_KEY) + end + + private + + def pid_exists?(pid) + Process.getpgid(pid) + rescue Errno::ESRCH + false + end + end +end diff --git a/spec/lib/docker_manager/pitchfork_adapter_spec.rb b/spec/lib/docker_manager/pitchfork_adapter_spec.rb new file mode 100644 index 0000000..0e12c61 --- /dev/null +++ b/spec/lib/docker_manager/pitchfork_adapter_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "docker_manager/upgrader" + +RSpec.describe DockerManager::PitchforkAdapter do + subject(:adapter) { described_class.new(upgrader) } + + let(:upgrader) { instance_double(DockerManager::Upgrader, log: nil) } + + before { allow_any_instance_of(Kernel).to receive(:`) } + + it_behaves_like "a web server adapter" + + describe "#server_name" do + it "returns 'Pitchfork'" do + expect(adapter.server_name).to eq("Pitchfork") + end + end + + describe "#launcher_pid" do + before do + allow_any_instance_of(Kernel).to receive(:`).with("pgrep -f unicorn_launcher").and_return( + "1234\n", + ) + end + + it "returns the pid of the 'unicorn_launcher' process" do + expect(adapter.launcher_pid).to eq(1234) + end + end + + describe "#master_pid" do + before do + allow_any_instance_of(Kernel).to receive(:`).with('pgrep -f "pitchfork monitor"').and_return( + "5678\n", + ) + end + + it "returns the pid of the Pitchfork monitor process" do + expect(adapter.master_pid).to eq(5678) + end + end +end diff --git a/spec/lib/docker_manager/unicorn_adapter_spec.rb b/spec/lib/docker_manager/unicorn_adapter_spec.rb new file mode 100644 index 0000000..3cd4ca1 --- /dev/null +++ b/spec/lib/docker_manager/unicorn_adapter_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "docker_manager/upgrader" + +RSpec.describe DockerManager::UnicornAdapter do + subject(:adapter) { described_class.new(upgrader) } + + let(:upgrader) { instance_double(DockerManager::Upgrader, log: nil) } + + before { allow_any_instance_of(Kernel).to receive(:`) } + + it_behaves_like "a web server adapter" + + describe "#server_name" do + it "returns 'Unicorn'" do + expect(adapter.server_name).to eq("Unicorn") + end + end + + describe "#launcher_pid" do + before do + allow_any_instance_of(Kernel).to receive(:`).with("pgrep -f unicorn_launcher").and_return( + "1234\n", + ) + end + + it "returns the pid of the 'unicorn_launcher' process" do + expect(adapter.launcher_pid).to eq(1234) + end + end + + describe "#master_pid" do + before do + allow_any_instance_of(Kernel).to receive(:`).with('pgrep -f "unicorn master -E"').and_return( + "5678\n", + ) + end + + it "returns the pid of the Unicorn master process" do + expect(adapter.master_pid).to eq(5678) + end + end +end diff --git a/spec/lib/docker_manager/upgrader_spec.rb b/spec/lib/docker_manager/upgrader_spec.rb new file mode 100644 index 0000000..d87aff3 --- /dev/null +++ b/spec/lib/docker_manager/upgrader_spec.rb @@ -0,0 +1,256 @@ +# frozen_string_literal: true + +require "docker_manager/git_repo" +require "docker_manager/upgrader" + +RSpec.describe DockerManager::Upgrader do + subject(:upgrader) { described_class.new(user.id, repo, from_version) } + + fab!(:user, :admin) + + let(:repo_path) { Rails.root.to_s } + let(:from_version) { "2025.12" } + let(:repo) do + instance_double( + DockerManager::GitRepo, + path: repo_path, + name: "discourse", + start_upgrading: true, + stop_upgrading: true, + latest_local_commit: "2025.12", + tracking_ref: "origin/main", + upstream_branch: "origin/main", + has_local_main?: true, + detached_head?: false, + ) + end + + describe "#web_server" do + context "when unicorn is running" do + before do + allow_any_instance_of(Kernel).to receive(:`).with("pgrep -f '^unicorn[^_]'").and_return( + "1234", + ) + end + + it "uses UnicornAdapter" do + expect(upgrader.web_server).to be_a_kind_of(DockerManager::UnicornAdapter) + end + end + + context "when pitchfork is running" do + before do + allow_any_instance_of(Kernel).to receive(:`).with("pgrep -f '^unicorn[^_]'").and_return("") + end + + it "uses PitchforkAdapter" do + expect(upgrader.web_server).to be_a_kind_of(DockerManager::PitchforkAdapter) + end + end + end + + describe "#upgrade" do + let(:launcher_pid) { 1000 } + let(:master_pid) { 1001 } + let(:workers) { [2001, 2002, 2003] } + + before do + allow_any_instance_of(Kernel).to receive(:`).with(/pgrep/).and_return("") + allow(Open3).to receive(:popen2e).and_yield([], [], OpenStruct.new(value: 0)) + allow(upgrader.web_server).to receive_messages( + launcher_pid:, + master_pid:, + workers:, + scale_down_workers: nil, + reload: nil, + clear_restart_flag: nil, + scale_up_workers: nil, + ) + end + + context "when a repo fails to start upgrading" do + before { allow(repo).to receive(:start_upgrading).and_return(false) } + + it "aborts the upgrade" do + expect { upgrader.upgrade }.not_to change { Discourse.redis.get(upgrader.logs_key) } + end + end + + context "when not enough workers are running" do + let(:workers) { [2001] } + + it "raises an error" do + expect { upgrader.upgrade }.to raise_error("Not enough workers") + end + end + + context "when launcher process is missing" do + let(:launcher_pid) { 0 } + + it "raises an error" do + expect { upgrader.upgrade }.to raise_error("No Pitchfork master or launcher") + end + end + + context "when master process is missing" do + let(:master_pid) { 0 } + + it "raises an error" do + expect { upgrader.upgrade }.to raise_error("No Pitchfork master or launcher") + end + end + + context "with valid configuration" do + it "scales down workers" do + upgrader.upgrade + expect(upgrader.web_server).to have_received(:scale_down_workers).with(2) + end + + it "reloads the web server to free memory" do + upgrader.upgrade + expect(upgrader.web_server).to have_received(:reload).twice + end + + it "runs git fetch and reset commands" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with(a_kind_of(Hash), /git fetch/).ordered + expect(Open3).to have_received(:popen2e).with(a_kind_of(Hash), /git reset --hard/).ordered + end + + it "runs bundle install" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /bundle install --retry 3 --jobs 4/, + ) + end + + it "runs database migrations" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /SKIP_POST_DEPLOYMENT_MIGRATIONS=1 bundle exec rake multisite:migrate/, + ).ordered + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /&& bundle exec rake multisite:migrate/, + ).ordered + end + + it "compiles assets" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /bundle exec rake themes:update assets:precompile/, + ) + end + + it "logs the version upgrade" do + expect { upgrader.upgrade }.to change { + UserHistory.where(custom_type: "discourse_update").count + }.by(1) + end + + it "sets status to complete when done" do + expect { upgrader.upgrade }.to change { upgrader.last_status }.to "complete" + end + + it "clears the restart flag" do + upgrader.upgrade + expect(upgrader.web_server).to have_received(:clear_restart_flag) + end + end + + context "when handling branch rename from master to main" do + before do + allow(repo).to receive_messages( + upstream_branch: "origin/master", + tracking_ref: "origin/main", + ) + end + + context "when local main branch exists" do + before { allow(repo).to receive(:has_local_main?).and_return(true) } + + it "checks out main branch" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with(a_kind_of(Hash), /git checkout main/) + end + end + + context "when local main branch does not exist" do + before { allow(repo).to receive(:has_local_main?).and_return(false) } + + it "renames master to main" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /git branch -m master main/, + ) + end + end + end + + context "when repo is in detached head state" do + before { allow(repo).to receive(:detached_head?).and_return(true) } + + it "checks out the tracking ref directly" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /git -c advice.detachedHead=false checkout/, + ) + end + end + + context "when S3 assets are configured" do + before do + ENV.merge!( + "DISCOURSE_USE_S3" => "true", + "DISCOURSE_S3_BUCKET" => "my-bucket", + "DISCOURSE_S3_CDN_URL" => "https://cdn.example.com", + ) + end + + it "uploads assets to S3" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /bundle exec rake s3:upload_assets/, + ) + end + + it "expires missing assets from S3" do + upgrader.upgrade + expect(Open3).to have_received(:popen2e).with( + a_kind_of(Hash), + /bundle exec rake s3:expire_missing_assets/, + ) + end + end + + context "when an error occurs during upgrade" do + before do + allow(Open3).to receive(:popen2e).with( + a_kind_of(Hash), + /bundle install --retry 3 --jobs 4/, + ).and_raise(RuntimeError) + end + + it "sets status to failed" do + expect { upgrader.upgrade }.to raise_error(RuntimeError) + expect(upgrader.last_status).to eq("failed") + end + + it "stops upgrading for all repos" do + expect { upgrader.upgrade }.to raise_error(RuntimeError) + expect(repo).to have_received(:stop_upgrading) + end + + it "scales workers back up" do + expect { upgrader.upgrade }.to raise_error(RuntimeError) + expect(upgrader.web_server).to have_received(:scale_up_workers).with(2) + end + end + end +end diff --git a/spec/plugin_helper.rb b/spec/plugin_helper.rb new file mode 100644 index 0000000..0a1defd --- /dev/null +++ b/spec/plugin_helper.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +Pathname.new(__dir__).glob("support/**/*.rb").each { |f| require f } diff --git a/spec/support/shared_examples/web_server_adapter.rb b/spec/support/shared_examples/web_server_adapter.rb new file mode 100644 index 0000000..0632b4f --- /dev/null +++ b/spec/support/shared_examples/web_server_adapter.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +RSpec.shared_examples "a web server adapter" do + subject(:adapter) { described_class.new(upgrader) } + + let(:upgrader) { instance_double(DockerManager::Upgrader, log: nil) } + + describe "#workers" do + before do + allow(adapter).to receive(:master_pid).and_return(1001) + allow_any_instance_of(Kernel).to receive(:`).with("pgrep -f -P 1001 worker").and_return( + "2001\n2002\n2003\n", + ) + end + + it "returns array of worker PIDs" do + expect(adapter.workers).to contain_exactly(2001, 2002, 2003) + end + end + + describe "#min_workers" do + it "returns at least 1" do + expect(adapter.min_workers).to be >= 1 + end + end + + describe "#local_web_url" do + context "when UNICORN_PORT is set" do + before { allow(ENV).to receive(:[]).with("UNICORN_PORT").and_return("8080") } + + it "uses the configured port" do + expect(adapter.local_web_url).to eq("http://127.0.0.1:8080/srv/status") + end + end + + context "when UNICORN_PORT is not set" do + it "defaults to port 3000" do + expect(adapter.local_web_url).to eq("http://127.0.0.1:3000/srv/status") + end + end + end + + describe "#scale_down_workers" do + let(:master_pid) { 1234 } + + before do + allow(adapter).to receive(:master_pid).and_return(master_pid) + allow(Process).to receive(:kill) + end + + it "sends TTOU signal to master for each worker to scale down" do + adapter.scale_down_workers(3) + expect(Process).to have_received(:kill).with("TTOU", master_pid).thrice + end + end + + describe "#scale_up_workers" do + let(:master_pid) { 1234 } + + before do + allow(adapter).to receive(:master_pid).and_return(master_pid) + allow(Process).to receive(:kill) + end + + it "sends TTIN signal to master for each worker to scale up" do + adapter.scale_up_workers(2) + expect(Process).to have_received(:kill).with("TTIN", master_pid).twice + end + end + + describe "#set_restart_flag" do + it "sets the server restart flag through redis" do + expect { adapter.set_restart_flag }.to change { + Discourse.redis.get(DockerManager::WebServerAdapter::RESTART_FLAG_KEY) + }.to("1") + end + end + + describe "#clear_restart_flag" do + before { adapter.set_restart_flag } + + it "deletes the server restart flag in redis" do + expect { adapter.clear_restart_flag }.to change { + Discourse.redis.get(DockerManager::WebServerAdapter::RESTART_FLAG_KEY) + }.to be_nil + end + end + + describe "#reload" do + let(:launcher_pid) { 1000 } + let(:master_pid) { 1001 } + let(:server_name) { adapter.server_name } + + before do + allow(adapter).to receive_messages( + launcher_pid:, + master_pid:, + set_restart_flag: "OK", + clear_restart_flag: nil, + ) + allow(adapter).to receive(:sleep) + allow(adapter).to receive(:`).and_return("ok") + allow(Process).to receive(:kill) + allow(Process).to receive(:getpgid).and_raise(Errno::ESRCH) + end + + it "sets the restart flag before reloading" do + adapter.reload + expect(adapter).to have_received(:set_restart_flag).ordered + expect(Process).to have_received(:kill).with("USR2", launcher_pid).ordered + end + + it "logs the restart action" do + adapter.reload + expect(upgrader).to have_received(:log).with("Restarting #{server_name} pid: #{launcher_pid}") + end + + it "sends USR2 signal to launcher" do + adapter.reload + expect(Process).to have_received(:kill).with("USR2", launcher_pid) + end + + it "waits for original master to exit" do + call_count = 0 + allow(Process).to receive(:getpgid).with(master_pid) do + call_count += 1 + raise Errno::ESRCH if call_count > 2 + true + end + + adapter.reload + expect(adapter).to have_received(:sleep).with(2).twice + end + + it "waits for workers to respond to health check" do + call_count = 0 + allow(adapter).to receive(:`).with(/curl/) do + call_count += 1 + call_count > 3 ? "ok" : "not ready" + end + + adapter.reload + expect(adapter).to have_received(:sleep).with(2).thrice + end + + it "clears the restart flag after successful reload" do + adapter.reload + expect(adapter).to have_received(:clear_restart_flag) + end + end +end