From 29ee99d3b22fc27eeb8c24ebbf7ea4bd00545945 Mon Sep 17 00:00:00 2001
From: Charles Paul <charles.paul@sentry.io>
Date: Mon, 22 Dec 2025 12:13:19 -0500
Subject: [PATCH] fix(preprocessing): get merged group_ids after intersections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We support users merging / unmerging / reprocessing their groups. Each of these operations requires the group's occurrences to point to the new group ID. Clickhouse-based data stores are immutable-ish, so we do that through the power of (1) tracking Group Redirects on one of these move-events, and (2) preprocessing queries to expand group_id filters to include all the relevant groups.

Currently, the preprocessing apparatus aims for _correctness_ — it gets all merged group_ids for each filter before intersecting them. This is technically correct, but (1) unlikely to be important (since users usually just use the newest group_id) and (2) breaks end-user expectations (since users would expect `if g_id = 1 AND g_id = 2` to generally not return any results, even if one was merged into the other).

This PR changes the behavior so that we run all the intersections before expanding through redirects. This has two main benefits:
* Intersections will necessarily have equal-or-fewer group_ids than each filter. Passing fewer IDs into the expansion will be faster and make us less likely to hit the threshold (& miss results).
* Only running the expansion twice instead of once for each filter means that we'll cut down on the number of expensive DB queries, which should speed up queries.
---
 src/sentry/utils/snuba.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/sentry/utils/snuba.py b/src/sentry/utils/snuba.py
index d5f63b1c7f6397..0059eba9bc836a 100644
--- a/src/sentry/utils/snuba.py
+++ b/src/sentry/utils/snuba.py
@@ -940,7 +940,7 @@ def _preprocess_group_id_redirects(self):
         out_groups: set[int | str] = set()
         if "group_id" in self.filter_keys:
             self.filter_keys = self.filter_keys.copy()
-            in_groups = get_all_merged_group_ids(self.filter_keys["group_id"])
+            in_groups = set(self.filter_keys["group_id"])
             del self.filter_keys["group_id"]
 
         new_conditions = []
@@ -953,12 +953,12 @@ def _preprocess_group_id_redirects(self):
             op = triple[1]
             # IN statements need to intersect
             if op == "IN":
-                new_in_groups = get_all_merged_group_ids(triple[2])
+                new_in_groups = set(triple[2])
                 if in_groups is not None:
                     new_in_groups = in_groups.intersection(new_in_groups)
                 in_groups = new_in_groups
             elif op == "=":
-                new_in_groups = get_all_merged_group_ids([triple[2]])
+                new_in_groups = {triple[2]}
                 if in_groups is not None:
                     new_in_groups = in_groups.intersection(new_in_groups)
                 in_groups = new_in_groups
@@ -968,13 +968,13 @@ def _preprocess_group_id_redirects(self):
             elif op == "!=":
                 out_groups.add(triple[2])
 
-        out_groups = get_all_merged_group_ids(list(out_groups))
+        out_groups = get_all_merged_group_ids(out_groups)
         triple = None
         # If there is an "IN" statement, we don't need a "NOT IN" statement. We can
         # just subtract the NOT IN groups from the IN groups.
         if in_groups is not None:
             in_groups.difference_update(out_groups)
-            triple = ["group_id", "IN", in_groups]
+            triple = ["group_id", "IN", get_all_merged_group_ids(in_groups)]
         elif len(out_groups) > 0:
             triple = ["group_id", "NOT IN", out_groups]