From 260c391707143316b3fa0385c40f7c0fb1e16838 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Mon, 23 Mar 2026 20:20:13 +0000 Subject: [PATCH] perf: denormalize post_status into event_dates to eliminate posts table JOIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add post_status column to datamachine_event_dates table with composite index (post_status, start_datetime). This allows date-filtered queries to skip the 130K-row posts table entirely. - EventDatesTable: add post_status column to schema, upsert, and backfill - meta-storage: sync post_status on transition_post_status hook - DateFilter: add post_status filter and join_column params to SQL helpers - Taxonomy_Helper: skip posts JOIN when date filter is active - UpcomingCountAbilities: skip posts JOIN, use ed.post_status Benchmarks on 37K events: - Location term counts: 2.9s → 107ms (27x faster) - Cross-filter counts: 3.7s → 174ms (21x faster) --- inc/Abilities/UpcomingCountAbilities.php | 15 +++-- inc/Blocks/Calendar/Query/DateFilter.php | 31 ++++++++--- inc/Blocks/Calendar/Taxonomy_Helper.php | 70 ++++++++++++++++-------- inc/Core/EventDatesTable.php | 45 +++++++++++++-- inc/Core/meta-storage.php | 23 ++++++++ 5 files changed, 138 insertions(+), 46 deletions(-) diff --git a/inc/Abilities/UpcomingCountAbilities.php b/inc/Abilities/UpcomingCountAbilities.php index e2949df..2823e6d 100644 --- a/inc/Abilities/UpcomingCountAbilities.php +++ b/inc/Abilities/UpcomingCountAbilities.php @@ -5,9 +5,9 @@ * Counts upcoming events grouped by taxonomy term. This is the raw data * primitive powering homepage badges, cross-site links, and market reports. * - * The query joins postmeta (_datamachine_event_datetime >= today) to filter - * only future events, then GROUP BY term for counts. On 35K+ events this - * takes ~7s, so consumers should always cache the results. + * The query joins event_dates (start_datetime >= today, post_status = 'publish') + * to filter only future published events, then GROUP BY term for counts. + * Skips the posts table entirely via denormalized post_status column. * * @package DataMachineEvents\Abilities */ @@ -122,18 +122,17 @@ public function executeGetUpcomingCounts( array $input ): array { $parent_clause = $exclude_roots ? 'AND tt.parent != 0' : ''; + // Uses ed.post_status to avoid joining the posts table (3s → <100ms). // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery,WordPress.DB.DirectDatabaseQuery.NoCaching,WordPress.DB.PreparedSQL.InterpolatedNotPrepared $rows = $wpdb->get_results( $wpdb->prepare( - "SELECT t.term_id, t.name, t.slug, COUNT(DISTINCT p.ID) AS event_count + "SELECT t.term_id, t.name, t.slug, COUNT(DISTINCT tr.object_id) AS event_count FROM {$wpdb->term_relationships} tr INNER JOIN {$wpdb->term_taxonomy} tt ON tr.term_taxonomy_id = tt.term_taxonomy_id INNER JOIN {$wpdb->terms} t ON tt.term_id = t.term_id - INNER JOIN {$wpdb->posts} p ON tr.object_id = p.ID - INNER JOIN {$ed_table} ed ON p.ID = ed.post_id + INNER JOIN {$ed_table} ed ON tr.object_id = ed.post_id WHERE tt.taxonomy = %s - AND p.post_type = 'data_machine_events' - AND p.post_status = 'publish' + AND ed.post_status = 'publish' AND ed.start_datetime >= %s {$parent_clause} GROUP BY t.term_id diff --git a/inc/Blocks/Calendar/Query/DateFilter.php b/inc/Blocks/Calendar/Query/DateFilter.php index 32229a2..c92250a 100644 --- a/inc/Blocks/Calendar/Query/DateFilter.php +++ b/inc/Blocks/Calendar/Query/DateFilter.php @@ -119,14 +119,21 @@ public static function apply_date_orderby( string $direction = 'ASC' ): callable /** * Raw SQL fragments for upcoming events. * + * Uses ed.post_status = 'publish' to avoid joining the posts table. + * Set $include_status to false if the caller already joins posts and + * filters post_status there (e.g. WP_Query consumers). + * + * @param bool $include_status Whether to include post_status filter. Default true. + * @param string $join_column Column to join ed.post_id on. Default 'p.ID'. * @return array{joins: string, where: string, param_count: int} */ - public static function upcoming_sql(): array { + public static function upcoming_sql( bool $include_status = true, string $join_column = 'p.ID' ): array { $table = EventDatesTable::table_name(); + $status_clause = $include_status ? " AND ed.post_status = 'publish'" : ''; return array( - 'joins' => "INNER JOIN {$table} ed ON p.ID = ed.post_id", - 'where' => '(ed.start_datetime >= %s OR ed.end_datetime >= %s)', + 'joins' => "INNER JOIN {$table} ed ON {$join_column} = ed.post_id", + 'where' => "(ed.start_datetime >= %s OR ed.end_datetime >= %s){$status_clause}", 'param_count' => 2, ); } @@ -134,14 +141,17 @@ public static function upcoming_sql(): array { /** * Raw SQL fragments for past events. * + * @param bool $include_status Whether to include post_status filter. Default true. + * @param string $join_column Column to join ed.post_id on. Default 'p.ID'. * @return array{joins: string, where: string, param_count: int} */ - public static function past_sql(): array { + public static function past_sql( bool $include_status = true, string $join_column = 'p.ID' ): array { $table = EventDatesTable::table_name(); + $status_clause = $include_status ? " AND ed.post_status = 'publish'" : ''; return array( - 'joins' => "INNER JOIN {$table} ed ON p.ID = ed.post_id", - 'where' => '(ed.start_datetime < %s AND (ed.end_datetime < %s OR ed.end_datetime IS NULL))', + 'joins' => "INNER JOIN {$table} ed ON {$join_column} = ed.post_id", + 'where' => "(ed.start_datetime < %s AND (ed.end_datetime < %s OR ed.end_datetime IS NULL)){$status_clause}", 'param_count' => 2, ); } @@ -149,14 +159,17 @@ public static function past_sql(): array { /** * Raw SQL fragments for a date range filter. * + * @param bool $include_status Whether to include post_status filter. Default true. + * @param string $join_column Column to join ed.post_id on. Default 'p.ID'. * @return array{joins: string, where: string, param_count: int} */ - public static function date_range_sql(): array { + public static function date_range_sql( bool $include_status = true, string $join_column = 'p.ID' ): array { $table = EventDatesTable::table_name(); + $status_clause = $include_status ? " AND ed.post_status = 'publish'" : ''; return array( - 'joins' => "INNER JOIN {$table} ed ON p.ID = ed.post_id", - 'where' => '(ed.start_datetime >= %s AND ed.start_datetime <= %s)', + 'joins' => "INNER JOIN {$table} ed ON {$join_column} = ed.post_id", + 'where' => "(ed.start_datetime >= %s AND ed.start_datetime <= %s){$status_clause}", 'param_count' => 2, ); } diff --git a/inc/Blocks/Calendar/Taxonomy_Helper.php b/inc/Blocks/Calendar/Taxonomy_Helper.php index d791ff3..de36402 100644 --- a/inc/Blocks/Calendar/Taxonomy_Helper.php +++ b/inc/Blocks/Calendar/Taxonomy_Helper.php @@ -147,11 +147,10 @@ function ( $term ) { public static function get_batch_term_counts( $taxonomy_slug, $date_context = array(), $active_filters = array(), $tax_query_override = null ) { global $wpdb; - $post_type = Event_Post_Type::POST_TYPE; - $joins = ''; $where_clauses = ''; - $params = array( $taxonomy_slug, $post_type ); + $params = array( $taxonomy_slug ); + $has_date_filter = false; if ( ! empty( $date_context ) ) { $date_start = $date_context['date_start'] ?? ''; @@ -160,23 +159,26 @@ public static function get_batch_term_counts( $taxonomy_slug, $date_context = ar $current_datetime = current_time( 'mysql' ); if ( ! empty( $date_start ) && ! empty( $date_end ) ) { - $filter = DateFilter::date_range_sql(); + $filter = DateFilter::date_range_sql( true, 'tr.object_id' ); $joins .= ' ' . $filter['joins']; $where_clauses .= ' AND ' . $filter['where']; $params[] = $date_start . ' 00:00:00'; $params[] = $date_end . ' 23:59:59'; + $has_date_filter = true; } elseif ( $show_past ) { - $filter = DateFilter::past_sql(); + $filter = DateFilter::past_sql( true, 'tr.object_id' ); $joins .= ' ' . $filter['joins']; $where_clauses .= ' AND ' . $filter['where']; $params[] = $current_datetime; $params[] = $current_datetime; + $has_date_filter = true; } else { - $filter = DateFilter::upcoming_sql(); + $filter = DateFilter::upcoming_sql( true, 'tr.object_id' ); $joins .= ' ' . $filter['joins']; $where_clauses .= ' AND ' . $filter['where']; $params[] = $current_datetime; $params[] = $current_datetime; + $has_date_filter = true; } } @@ -194,7 +196,7 @@ public static function get_batch_term_counts( $taxonomy_slug, $date_context = ar $alias_tr = "base_tr_{$base_join_index}"; $alias_tt = "base_tt_{$base_join_index}"; - $joins .= " INNER JOIN {$wpdb->term_relationships} {$alias_tr} ON p.ID = {$alias_tr}.object_id"; + $joins .= " INNER JOIN {$wpdb->term_relationships} {$alias_tr} ON tr.object_id = {$alias_tr}.object_id"; $joins .= " INNER JOIN {$wpdb->term_taxonomy} {$alias_tt} ON {$alias_tr}.term_taxonomy_id = {$alias_tt}.term_taxonomy_id"; // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared @@ -220,7 +222,7 @@ public static function get_batch_term_counts( $taxonomy_slug, $date_context = ar $alias_tr = "cross_tr_{$join_index}"; $alias_tt = "cross_tt_{$join_index}"; - $joins .= " INNER JOIN {$wpdb->term_relationships} {$alias_tr} ON p.ID = {$alias_tr}.object_id"; + $joins .= " INNER JOIN {$wpdb->term_relationships} {$alias_tr} ON tr.object_id = {$alias_tr}.object_id"; $joins .= " INNER JOIN {$wpdb->term_taxonomy} {$alias_tt} ON {$alias_tr}.term_taxonomy_id = {$alias_tt}.term_taxonomy_id"; // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared @@ -231,22 +233,42 @@ public static function get_batch_term_counts( $taxonomy_slug, $date_context = ar ++$join_index; } - // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared - $query = $wpdb->prepare( - "SELECT tt.term_id, COUNT(DISTINCT tr.object_id) as event_count - FROM {$wpdb->term_relationships} tr - INNER JOIN {$wpdb->term_taxonomy} tt - ON tr.term_taxonomy_id = tt.term_taxonomy_id - INNER JOIN {$wpdb->posts} p - ON tr.object_id = p.ID - {$joins} - WHERE tt.taxonomy = %s - AND p.post_type = %s - AND p.post_status = 'publish' - {$where_clauses} - GROUP BY tt.term_id", - $params - ); + // When a date filter is active, event_dates provides post_status filtering + // so we can skip the expensive posts table JOIN entirely. + if ( $has_date_filter ) { + // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared + $query = $wpdb->prepare( + "SELECT tt.term_id, COUNT(DISTINCT tr.object_id) as event_count + FROM {$wpdb->term_relationships} tr + INNER JOIN {$wpdb->term_taxonomy} tt + ON tr.term_taxonomy_id = tt.term_taxonomy_id + {$joins} + WHERE tt.taxonomy = %s + {$where_clauses} + GROUP BY tt.term_id", + $params + ); + } else { + // No date filter — fall back to posts JOIN for type/status filtering. + $post_type = Event_Post_Type::POST_TYPE; + array_splice( $params, 1, 0, array( $post_type ) ); + // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared + $query = $wpdb->prepare( + "SELECT tt.term_id, COUNT(DISTINCT tr.object_id) as event_count + FROM {$wpdb->term_relationships} tr + INNER JOIN {$wpdb->term_taxonomy} tt + ON tr.term_taxonomy_id = tt.term_taxonomy_id + INNER JOIN {$wpdb->posts} p + ON tr.object_id = p.ID + {$joins} + WHERE tt.taxonomy = %s + AND p.post_type = %s + AND p.post_status = 'publish' + {$where_clauses} + GROUP BY tt.term_id", + $params + ); + } // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared $results = $wpdb->get_results( $query ); diff --git a/inc/Core/EventDatesTable.php b/inc/Core/EventDatesTable.php index a04e163..2b51e47 100644 --- a/inc/Core/EventDatesTable.php +++ b/inc/Core/EventDatesTable.php @@ -6,6 +6,10 @@ * postmeta-based event datetime storage. Provides schema creation via * dbDelta(), backfill from postmeta, and helper read/write functions. * + * The table includes a denormalized post_status column so that queries + * can filter to published events without joining the posts table (which + * is the primary bottleneck on sites with 30K+ events). + * * @package DataMachineEvents\Core * @since 0.23.0 */ @@ -41,9 +45,11 @@ public static function create_table(): void { post_id BIGINT UNSIGNED NOT NULL, start_datetime DATETIME NOT NULL, end_datetime DATETIME DEFAULT NULL, + post_status VARCHAR(20) NOT NULL DEFAULT 'publish', PRIMARY KEY (post_id), KEY start_datetime (start_datetime), - KEY end_datetime (end_datetime) + KEY end_datetime (end_datetime), + KEY status_start (post_status, start_datetime) ) ENGINE=InnoDB {$charset};"; require_once ABSPATH . 'wp-admin/includes/upgrade.php'; @@ -67,18 +73,44 @@ public static function table_exists(): bool { * @param int $post_id Post ID. * @param string $start_datetime MySQL datetime string. * @param string|null $end_datetime MySQL datetime string or null. + * @param string|null $post_status Post status (auto-detected from post if null). */ - public static function upsert( int $post_id, string $start_datetime, ?string $end_datetime = null ): void { + public static function upsert( int $post_id, string $start_datetime, ?string $end_datetime = null, ?string $post_status = null ): void { global $wpdb; + if ( null === $post_status ) { + $post_status = get_post_status( $post_id ) ?: 'publish'; + } + $wpdb->replace( self::table_name(), array( 'post_id' => $post_id, 'start_datetime' => $start_datetime, 'end_datetime' => $end_datetime, + 'post_status' => $post_status, ), - array( '%d', '%s', $end_datetime ? '%s' : null ) + array( '%d', '%s', $end_datetime ? '%s' : null, '%s' ) + ); + } + + /** + * Update the post_status column for an event. + * + * Called from transition_post_status hook to keep denormalized status in sync. + * + * @param int $post_id Post ID. + * @param string $post_status New post status. + */ + public static function update_status( int $post_id, string $post_status ): void { + global $wpdb; + + $wpdb->update( + self::table_name(), + array( 'post_status' => $post_status ), + array( 'post_id' => $post_id ), + array( '%s' ), + array( '%d' ) ); } @@ -137,8 +169,10 @@ public static function backfill( int $batch_size = 500, ?callable $progress = nu $wpdb->prepare( "SELECT pm_start.post_id, pm_start.meta_value AS start_datetime, - pm_end.meta_value AS end_datetime + pm_end.meta_value AS end_datetime, + p.post_status FROM {$wpdb->postmeta} pm_start + INNER JOIN {$wpdb->posts} p ON pm_start.post_id = p.ID LEFT JOIN {$table} ed ON pm_start.post_id = ed.post_id LEFT JOIN {$wpdb->postmeta} pm_end ON pm_start.post_id = pm_end.post_id @@ -158,7 +192,8 @@ public static function backfill( int $batch_size = 500, ?callable $progress = nu self::upsert( (int) $row->post_id, $row->start_datetime, - $row->end_datetime ?: null + $row->end_datetime ?: null, + $row->post_status ); ++$total; } diff --git a/inc/Core/meta-storage.php b/inc/Core/meta-storage.php index b5161aa..04225a9 100644 --- a/inc/Core/meta-storage.php +++ b/inc/Core/meta-storage.php @@ -249,6 +249,29 @@ function data_machine_events_sync_datetime_meta( $post_id, $post, $update ) { } add_action( 'save_post', __NAMESPACE__ . '\\data_machine_events_sync_datetime_meta', 10, 3 ); +/** + * Sync post_status to event_dates table on status transitions. + * + * Keeps the denormalized post_status column in sync so that date queries + * can filter by status without joining the full posts table. + * + * @param string $new_status New post status. + * @param string $old_status Old post status. + * @param WP_Post $post Post object. + */ +function data_machine_events_sync_status( $new_status, $old_status, $post ) { + if ( Event_Post_Type::POST_TYPE !== $post->post_type ) { + return; + } + + if ( $new_status === $old_status ) { + return; + } + + EventDatesTable::update_status( $post->ID, $new_status ); +} +add_action( 'transition_post_status', __NAMESPACE__ . '\\data_machine_events_sync_status', 10, 3 ); + /** * Get event dates from the dedicated event_dates table. *