From 615febbb1511da4cfde2f77f9042130b8bdca625 Mon Sep 17 00:00:00 2001 From: Guillaume Armede Date: Thu, 10 Apr 2025 09:08:26 +0200 Subject: [PATCH 1/7] Add exclude for last_vacuum and last_analyze --- README | 18 ++++++++ check_pgactivity | 108 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 105 insertions(+), 21 deletions(-) diff --git a/README b/README index c8c1bda..1c5fb81 100644 --- a/README +++ b/README @@ -725,6 +725,15 @@ COMPATIBILITY This service supports both "--dbexclude" and "--dbinclude" parameters. The 'postgres' database and templates are always excluded. + + It also supports a "--exclude REGEX" parameter to exclude relations + matching a regular expression. The regular expression applies to + "database.schema_name.relation_name". This enables you to filter + either on a relation name for all schemas and databases, on a + qualified named relation (schema + relation) for all databases or on + a qualified named relation in only one database. + + You can use multiple "--exclude REGEX" parameters. Required privileges: unprivileged role able to log in all databases. @@ -756,6 +765,15 @@ COMPATIBILITY parameters. The 'postgres' database and templates are always excluded. + It also supports a "--exclude REGEX" parameter to exclude relations + matching a regular expression. The regular expression applies to + "database.schema_name.relation_name". This enables you to filter + either on a relation name for all schemas and databases, on a + qualified named relation (schema + relation) for all databases or on + a qualified named relation in only one database. + + You can use multiple "--exclude REGEX" parameters. + Required privileges: unprivileged role able to log in all databases. locks (all) diff --git a/check_pgactivity b/check_pgactivity index 9199968..a747285 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -358,6 +358,7 @@ Repeat this option as many time as needed. See C<--dbinclude> as well. If a database match both dbexclude and dbinclude arguments, it is excluded. + =item B<--dbinclude> REGEXP Some services automatically check all the databases of your @@ -512,6 +513,8 @@ my %args = ( 'format' => 'nagios', 'uid' => undef, 'with-hugepages' => undef + 'schexclude' => undef, + 'relexclude' => undef ); # Set name of the program without path* @@ -5149,7 +5152,7 @@ sub check_is_replay_paused { # Agnostic check vacuum or analyze sub # FIXME: we can certainly do better about temp tables sub check_last_maintenance { - my $rs; + my @rs; my $c_limit; my $w_limit; my @perfdata; @@ -5182,12 +5185,16 @@ sub check_last_maintenance { $w_limit = get_time $args{'warning'}; my %queries = ( - # 1st field: oldest known maintenance on a table + # 1st field: is the instance in recovery + # 2nd field: current database + # 3nd field: namespace + # 4nd filed: relation + # 5th field: oldest known maintenance on a table # -inf if a table never had maintenance - # NaN if nothing found or secondary - # 2nd field: total number of maintenance - # 3nd field: total number of auto-maintenance - # 4th field: hash(insert||update||delete||thresholds) to avoid + # NaN if nothing found + # 6th field: total number of maintenance + # 7th field: total number of auto-maintenance + # 8th field: hash(insert||update||delete||thresholds) to avoid # useless alerts when there is no write activity (if # thresholds do not change) # @@ -5196,6 +5203,9 @@ sub check_last_maintenance { $PG_VERSION_82 => qq{ SELECT false AS is_in_recovery, + current_database(), + schemaname, + relname, coalesce(max( coalesce(extract(epoch FROM current_timestamp - @@ -5209,12 +5219,17 @@ sub check_last_maintenance { ||'$c_limit $w_limit')) FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' + AND n_live_tup > 0 + GROUP BY schemaname,relname }, # Starting with 8.3, we can check database activity from # pg_stat_database $PG_VERSION_83 => qq{ SELECT false AS is_in_recovery, + current_database(), + schemaname, + relname, coalesce(max( coalesce(extract(epoch FROM current_timestamp - @@ -5230,12 +5245,17 @@ sub check_last_maintenance { FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' AND schemaname NOT LIKE 'pg_toast_temp_%' + AND n_live_tup > 0 + GROUP BY schemaname,relname }, # Starting with 9.0, we can check database status # (primary or secondary) $PG_VERSION_90 => qq{ SELECT pg_is_in_recovery()::int AS is_in_recovery, + current_database(), + schemaname, + relname, CASE WHEN NOT pg_is_in_recovery() THEN coalesce(max( coalesce(extract(epoch FROM @@ -5253,11 +5273,16 @@ sub check_last_maintenance { FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' AND schemaname NOT LIKE 'pg_toast_temp_%' + AND n_live_tup > 0 + GROUP BY schemaname,relname }, # Starting with 9.1, we can add the analyze/vacuum counts $PG_VERSION_91 => qq{ SELECT pg_is_in_recovery()::int AS is_in_recovery, + current_database(), + schemaname, + relname, CASE WHEN NOT pg_is_in_recovery() THEN coalesce(max( coalesce(extract(epoch FROM @@ -5288,6 +5313,8 @@ sub check_last_maintenance { AND schemaname NOT LIKE 'pg_toast_temp_%' AND (('${type}' = 'vacuum' AND relkind <> 'p') -- partitioned table do not have last_* information OR ('${type}' = 'analyze')) + AND n_live_tup > 0 + GROUP BY schemaname,relname } ); @@ -5315,30 +5342,50 @@ sub check_last_maintenance { LOOP_DB: foreach my $db (@all_db) { my @perf; - my $rs; - - next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; + my @rs; + my $maintenance; + my $maintenance_count = 0 ; + my $maintenance_count_auto = 0 ; + my $maintenance_max = -1 ; + my $maintenance_hash = 'null' ; + + next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; next LOOP_DB if @dbinclude and not grep { $db =~ /$_/ } @dbinclude; $dbchecked++; - $rs = query_ver( $hosts[0], %queries, $db )->[0]; - + @rs = @{ query_ver( $hosts[0], %queries, $db ) } ; $db =~ s/=//g; return status_unknown( $me, [ "Server is no primary." ] ) if $rs->[0]; - push @perfdata => [ $db, $rs->[1], 's', $w_limit, $c_limit ]; - $new_counts{$db} = [ $rs->[2], $rs->[3] ]; + + MAINTENANCE_LOOP: foreach my $maintenance (@rs) { + + foreach my $exclude_re ( @{ $args{'exclude'} } ) { + next MAINTENANCE_LOOP if "$maintenance->[0].$maintenance->[1].$maintenance->[2]" =~ m/$exclude_re/; + } + $maintenance_count += $maintenance->[4] ; + $maintenance_count_auto += $maintenance->[5] ; + $maintenance_hash = $maintenance->[6] ; + if ( $maintenance->[3] gt $maintenance_max ) { + # Getting the MAX + $maintenance_max = $maintenance->[3] ; + } + + } + push @perfdata => [ $db, $maintenance_max, 's', $w_limit, $c_limit ]; + + $new_counts{$db} = [ $maintenance_count, $maintenance_count_auto ]; if ( exists $counts{$db} ) { if ($hosts[0]->{'version_num'} >= $PG_VERSION_91 ) { - my $delta = $rs->[2] - $counts{$db}[0]; - my $delta_auto = $rs->[3] - $counts{$db}[1]; + my $delta = $maintenance_count - $counts{$db}[0]; + my $delta_auto = $maintenance_count_auto - $counts{$db}[1]; push @perfdata => ( [ "$db $type", $delta ], @@ -5347,25 +5394,26 @@ LOOP_DB: foreach my $db (@all_db) { } # avoid alerts if no write activity since last call - if ( defined $counts{$db}[2] and $counts{$db}[2] eq $rs->[3] ) { + if ( defined $counts{$db}[2] and $counts{$db}[2] eq $maintenance_hash ) { # keep old hashed status for this database $new_counts{$db}[2] = $counts{$db}[2]; next LOOP_DB; } } - if ( $rs->[1] >= $c_limit ) { - push @msg_crit => "$db: " . to_interval($rs->[1]); + if ( $maintenance_max >= $c_limit ) { + push @msg_crit => "$db: " . to_interval($maintenance_max); next LOOP_DB; } - if ( $rs->[1] >= $w_limit ) { - push @msg_warn => "$db: " . to_interval($rs->[1]); + if ( $maintenance_max >= $w_limit ) { + push @msg_warn => "$db: " . to_interval($maintenance_max); next LOOP_DB; } # iif everything is OK, save the current hashed status for this database - $new_counts{$db}[2] = $rs->[4]; + $new_counts{$db}[2] = $maintenance_hash ; + } save $hosts[0], "${type}_counts", \%new_counts, $args{'status-file'}; @@ -5406,6 +5454,15 @@ raise any alerts, unless you change a threshold. This service supports both C<--dbexclude> and C<--dbinclude> parameters. The 'postgres' database and templates are always excluded. +This service supports a C<--exclude REGEX> parameter to exclude relations +matching a regular expression. The regular expression applies to +"database.schema_name.relation_name". This enables you to filter either on a +relation name for all schemas and databases, on a qualified named relation +(schema + relation) for all databases or on a qualified named relation in +only one database. + +You can use multiple C<--exclude REGEX> parameters. + Required privileges: unprivileged role able to log in all databases. =cut @@ -5442,6 +5499,15 @@ raise any alerts, unless you change a threshold. This service supports both C<--dbexclude> and C<--dbinclude> parameters. The 'postgres' database and templates are always excluded. +This service supports a C<--exclude REGEX> parameter to exclude relations +matching a regular expression. The regular expression applies to +"database.schema_name.relation_name". This enables you to filter either on a +relation name for all schemas and databases, on a qualified named relation +(schema + relation) for all databases or on a qualified named relation in +only one database. + +You can use multiple C<--exclude REGEX> parameters. + Required privileges: unprivileged role able to log in all databases. =cut From 54e928f80e187790e987d2a0cbf9acd81b07ffec Mon Sep 17 00:00:00 2001 From: Guillaume Armede Date: Fri, 23 May 2025 12:12:41 +0200 Subject: [PATCH 2/7] Remove n_live_tup predicat --- check_pgactivity | 1 + 1 file changed, 1 insertion(+) diff --git a/check_pgactivity b/check_pgactivity index a747285..75b6812 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -5273,6 +5273,7 @@ sub check_last_maintenance { FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' AND schemaname NOT LIKE 'pg_toast_temp_%' +<<<<<<< HEAD AND n_live_tup > 0 GROUP BY schemaname,relname }, From e5bab181c4b40dbeab62eefbc20ce82ee3bb4f2e Mon Sep 17 00:00:00 2001 From: blo Date: Tue, 1 Jul 2025 16:17:42 +0200 Subject: [PATCH 3/7] Remove unnecessary space/tabulation and add some when needed --- check_pgactivity | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/check_pgactivity b/check_pgactivity index 75b6812..1c43c67 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -5219,7 +5219,6 @@ sub check_last_maintenance { ||'$c_limit $w_limit')) FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' - AND n_live_tup > 0 GROUP BY schemaname,relname }, # Starting with 8.3, we can check database activity from @@ -5245,7 +5244,6 @@ sub check_last_maintenance { FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' AND schemaname NOT LIKE 'pg_toast_temp_%' - AND n_live_tup > 0 GROUP BY schemaname,relname }, # Starting with 9.0, we can check database status @@ -5273,8 +5271,6 @@ sub check_last_maintenance { FROM pg_stat_user_tables WHERE schemaname NOT LIKE 'pg_temp_%' AND schemaname NOT LIKE 'pg_toast_temp_%' -<<<<<<< HEAD - AND n_live_tup > 0 GROUP BY schemaname,relname }, # Starting with 9.1, we can add the analyze/vacuum counts @@ -5314,7 +5310,6 @@ sub check_last_maintenance { AND schemaname NOT LIKE 'pg_toast_temp_%' AND (('${type}' = 'vacuum' AND relkind <> 'p') -- partitioned table do not have last_* information OR ('${type}' = 'analyze')) - AND n_live_tup > 0 GROUP BY schemaname,relname } ); @@ -5345,39 +5340,37 @@ LOOP_DB: foreach my $db (@all_db) { my @perf; my @rs; my $maintenance; - my $maintenance_count = 0 ; - my $maintenance_count_auto = 0 ; - my $maintenance_max = -1 ; - my $maintenance_hash = 'null' ; + my $maintenance_count = 0; + my $maintenance_count_auto = 0; + my $maintenance_max = -1; + my $maintenance_hash = 'null'; - next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; + next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; next LOOP_DB if @dbinclude and not grep { $db =~ /$_/ } @dbinclude; $dbchecked++; - @rs = @{ query_ver( $hosts[0], %queries, $db ) } ; + @rs = @{ query_ver( $hosts[0], %queries, $db ) }; $db =~ s/=//g; return status_unknown( $me, [ "Server is no primary." ] ) if $rs->[0]; + MAINTENANCE_LOOP: foreach my $maintenance (@rs) { + foreach my $exclude_re ( @{ $args{'exclude'} } ) { + next MAINTENANCE_LOOP if "$maintenance->[0].$maintenance->[1].$maintenance->[2]" =~ m/$exclude_re/; + } + $maintenance_count += $maintenance->[4]; + $maintenance_count_auto += $maintenance->[5]; + $maintenance_hash = $maintenance->[6]; + if ( $maintenance->[3] gt $maintenance_max ) { + # Getting the MAX + $maintenance_max = $maintenance->[3]; + } - MAINTENANCE_LOOP: foreach my $maintenance (@rs) { - - foreach my $exclude_re ( @{ $args{'exclude'} } ) { - next MAINTENANCE_LOOP if "$maintenance->[0].$maintenance->[1].$maintenance->[2]" =~ m/$exclude_re/; - } - $maintenance_count += $maintenance->[4] ; - $maintenance_count_auto += $maintenance->[5] ; - $maintenance_hash = $maintenance->[6] ; - if ( $maintenance->[3] gt $maintenance_max ) { - # Getting the MAX - $maintenance_max = $maintenance->[3] ; - } - - } + } push @perfdata => [ $db, $maintenance_max, 's', $w_limit, $c_limit ]; $new_counts{$db} = [ $maintenance_count, $maintenance_count_auto ]; @@ -5414,7 +5407,6 @@ LOOP_DB: foreach my $db (@all_db) { # iif everything is OK, save the current hashed status for this database $new_counts{$db}[2] = $maintenance_hash ; - } save $hosts[0], "${type}_counts", \%new_counts, $args{'status-file'}; From 84225f52db31803331465e14ed00d4a6b5315e1c Mon Sep 17 00:00:00 2001 From: blo Date: Tue, 1 Jul 2025 17:28:07 +0200 Subject: [PATCH 4/7] Another alignment error in the code --- check_pgactivity | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_pgactivity b/check_pgactivity index 1c43c67..b50a604 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -5345,7 +5345,7 @@ LOOP_DB: foreach my $db (@all_db) { my $maintenance_max = -1; my $maintenance_hash = 'null'; - next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; + next LOOP_DB if grep { $db =~ /$_/ } @dbexclude; next LOOP_DB if @dbinclude and not grep { $db =~ /$_/ } @dbinclude; $dbchecked++; From 7a4f226b6be5afd09ef5b1a6ddd221fcc1997e09 Mon Sep 17 00:00:00 2001 From: blo Date: Tue, 1 Jul 2025 17:28:59 +0200 Subject: [PATCH 5/7] Remove useless comment --- check_pgactivity | 1 - 1 file changed, 1 deletion(-) diff --git a/check_pgactivity b/check_pgactivity index b50a604..53f22db 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -5366,7 +5366,6 @@ LOOP_DB: foreach my $db (@all_db) { $maintenance_count_auto += $maintenance->[5]; $maintenance_hash = $maintenance->[6]; if ( $maintenance->[3] gt $maintenance_max ) { - # Getting the MAX $maintenance_max = $maintenance->[3]; } From 30e5b3334eeda5fe7237f1be60aa0aa0df5819b1 Mon Sep 17 00:00:00 2001 From: blo Date: Tue, 9 Dec 2025 11:07:03 +0100 Subject: [PATCH 6/7] Fix mistake during rebase --- check_pgactivity | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/check_pgactivity b/check_pgactivity index 53f22db..b6e43d1 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -512,7 +512,7 @@ my %args = ( 'dump-bin-file' => undef, 'format' => 'nagios', 'uid' => undef, - 'with-hugepages' => undef + 'with-hugepages' => undef, 'schexclude' => undef, 'relexclude' => undef ); @@ -5353,9 +5353,10 @@ LOOP_DB: foreach my $db (@all_db) { @rs = @{ query_ver( $hosts[0], %queries, $db ) }; $db =~ s/=//g; + # Note: if @rs is empty $rs[0][0] is undef and the boolean test is false return status_unknown( $me, [ "Server is no primary." ] - ) if $rs->[0]; + ) if $rs[0][0]; MAINTENANCE_LOOP: foreach my $maintenance (@rs) { From 069849022753ecca6a66c9f25c1f5d63d21eacae Mon Sep 17 00:00:00 2001 From: blo Date: Wed, 10 Dec 2025 10:18:56 +0100 Subject: [PATCH 7/7] Fix another rebase error I missed a label since pg_class and pg_stat_user_tables both have relname. I did't shift the array access by one after merging the patch that adds a pg_is_in_recovery. I discovered autovivification, I feel dirty now. --- check_pgactivity | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/check_pgactivity b/check_pgactivity index b6e43d1..fc3fa49 100755 --- a/check_pgactivity +++ b/check_pgactivity @@ -5279,7 +5279,7 @@ sub check_last_maintenance { pg_is_in_recovery()::int AS is_in_recovery, current_database(), schemaname, - relname, + a.relname, CASE WHEN NOT pg_is_in_recovery() THEN coalesce(max( coalesce(extract(epoch FROM @@ -5310,7 +5310,7 @@ sub check_last_maintenance { AND schemaname NOT LIKE 'pg_toast_temp_%' AND (('${type}' = 'vacuum' AND relkind <> 'p') -- partitioned table do not have last_* information OR ('${type}' = 'analyze')) - GROUP BY schemaname,relname + GROUP BY schemaname, a.relname } ); @@ -5354,20 +5354,22 @@ LOOP_DB: foreach my $db (@all_db) { $db =~ s/=//g; # Note: if @rs is empty $rs[0][0] is undef and the boolean test is false + # we check if scalar as lines otherwise $rs[0][0] will create it return status_unknown( $me, [ "Server is no primary." ] - ) if $rs[0][0]; + ) if scalar @rs and $rs[0][0]; MAINTENANCE_LOOP: foreach my $maintenance (@rs) { foreach my $exclude_re ( @{ $args{'exclude'} } ) { - next MAINTENANCE_LOOP if "$maintenance->[0].$maintenance->[1].$maintenance->[2]" =~ m/$exclude_re/; + next MAINTENANCE_LOOP if "$maintenance->[1].$maintenance->[2].$maintenance->[3]" =~ m/$exclude_re/; } - $maintenance_count += $maintenance->[4]; - $maintenance_count_auto += $maintenance->[5]; - $maintenance_hash = $maintenance->[6]; - if ( $maintenance->[3] gt $maintenance_max ) { - $maintenance_max = $maintenance->[3]; + + $maintenance_count += $maintenance->[5]; + $maintenance_count_auto += $maintenance->[6]; + $maintenance_hash = $maintenance->[7]; + if ( $maintenance->[4] gt $maintenance_max ) { + $maintenance_max = $maintenance->[4]; } }