Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions group_vars/production/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ le_endpoint: https://acme-v02.api.letsencrypt.org/directory
alt_server_name: "www.unglue.it"
deploy_type: 'prod'

# IPs and CIDRs blocked at Apache level before requests reach Django/WSGI.
# UA-based blocking is also in bot-block.conf (SetEnvIfNoCase) and in Django's
# BotBlockingMiddleware — the Apache layer fires first, saving WSGI slots.

# Entire network ranges to block (distributed bots that rotate IPs within a /16):
blocked_cidrs:
- 43.173.0.0/16 # Tencent Cloud — distributed bot, 1,495 IPs on 2026-02-26

# Individual IPs for single-host offenders:
blocked_ips:
- 216.73.216.178 # ClaudeBot (Anthropic) — 229K req/day 2026-02-26

### Variables in settings.prod.py ###
mysql_db_name: "{{ vault_mysql_db_name }}"
mysql_db_user: "{{ vault_mysql_db_user }}"
Expand Down
19 changes: 18 additions & 1 deletion roles/regluit_prod/tasks/apache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,24 @@
- name: Enable SSL rewrite headers
become: yes
command: a2enmod ssl rewrite headers
notify:
notify:
- restart apache

- name: Deploy bot-block conf
become: yes
template:
src: bot-block.conf.j2
dest: /etc/apache2/conf-available/bot-block.conf
owner: root
group: root
mode: 0644
notify:
- restart apache

- name: Enable bot-block conf
become: yes
command: a2enconf bot-block
notify:
- restart apache

- name: Generate static files
Expand Down
17 changes: 14 additions & 3 deletions roles/regluit_prod/tasks/cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
# See: https://github.com/Gluejar/regluit/issues/1078 (bot traffic)

# Workaround for memory pressure under heavy bot traffic
# TODO: Remove once proper bot mitigation is in place (see issue #1078)
# Bot mitigation (bot-block.conf, BotBlockingMiddleware) is now live on prod (2026-02-27).
# TODO: Monitor load over coming days; if load stays low, remove this restart workaround
# and file a provisioning issue to remove the cron entry via Ansible.
- name: Restart Apache periodically (workaround for bot load)
become: yes
ansible.builtin.cron:
Expand All @@ -14,11 +16,20 @@
job: "service apache2 restart"
user: root

- name: Clean up old Apache logs (older than 14 days)
- name: Gzip Apache logs older than 1 day
become: yes
ansible.builtin.cron:
name: "apache-log-gzip"
minute: "0"
hour: "3"
job: "find /var/log/apache2 -name '*.log' -mtime +1 -exec gzip {} \\;"
user: root

- name: Delete gzipped Apache logs older than 30 days
become: yes
ansible.builtin.cron:
name: "apache-log-cleanup"
minute: "0"
hour: "3"
job: "find /var/log/apache2 -name '*.log' -mtime +14 -delete"
job: "find /var/log/apache2 -name '*.log.gz' -mtime +30 -delete"
user: root
52 changes: 52 additions & 0 deletions roles/regluit_prod/templates/bot-block.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# bot-block.conf — Apache-level bot blocking (pre-WSGI, zero Django cost)
# Managed by Ansible. Do not edit this file manually on the server.
#
# Two mechanisms:
# 1. UA-based: known bot UA strings → rejected before mod_wsgi spawns a thread
# 2. IP/CIDR: egregious single-IP offenders and known bad network ranges
#
# To add/remove IPs: edit blocked_ips or blocked_cidrs in
# group_vars/<env>/vars.yml and re-run the Ansible playbook.
# To add UAs: edit this template and re-run.

# --- UA-based blocking (pre-WSGI) ---
# Mirrors the BAD_ROBOTS list in Django's BotBlockingMiddleware.
# Apache rejects these before mod_wsgi is involved, saving all 30 WSGI slots.
SetEnvIfNoCase User-Agent "gptbot" bad_bot
SetEnvIfNoCase User-Agent "chatgpt-user" bad_bot
SetEnvIfNoCase User-Agent "oai-searchbot" bad_bot
SetEnvIfNoCase User-Agent "claudebot" bad_bot
SetEnvIfNoCase User-Agent "anthropic-ai" bad_bot
SetEnvIfNoCase User-Agent "claude-web" bad_bot
SetEnvIfNoCase User-Agent "perplexitybot" bad_bot
SetEnvIfNoCase User-Agent "perplexity-user" bad_bot
SetEnvIfNoCase User-Agent "amazonbot" bad_bot
SetEnvIfNoCase User-Agent "meta-externalagent" bad_bot
SetEnvIfNoCase User-Agent "meta-webindexer" bad_bot
SetEnvIfNoCase User-Agent "facebookbot" bad_bot
SetEnvIfNoCase User-Agent "ccbot" bad_bot
SetEnvIfNoCase User-Agent "diffbot" bad_bot
SetEnvIfNoCase User-Agent "bytespider" bad_bot
SetEnvIfNoCase User-Agent "cohere-ai" bad_bot
SetEnvIfNoCase User-Agent "timpibot" bad_bot
SetEnvIfNoCase User-Agent "imagesiftbot" bad_bot
SetEnvIfNoCase User-Agent "dataforseo" bad_bot
SetEnvIfNoCase User-Agent "QIHU 360SE" bad_bot
SetEnvIfNoCase User-Agent "MetaSr" bad_bot

<Location />
<RequireAll>
Require all granted
Require not env bad_bot
{% if blocked_cidrs is defined %}
{% for cidr in blocked_cidrs %}
Require not ip {{ cidr }}
{% endfor %}
{% endif %}
{% if blocked_ips is defined %}
{% for ip in blocked_ips %}
Require not ip {{ ip }}
{% endfor %}
{% endif %}
</RequireAll>
</Location>