From 835e0fbe53e558af0b06faf4c879c9b6ea61eb43 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 2 Oct 2025 10:31:13 +0200 Subject: [PATCH 01/49] ConversationStore.Migration: Implement function to migrate a conversation --- .../src/Wire/API/Conversation/Protocol.hs | 6 + .../src/Wire/ConversationStore/Migration.hs | 296 ++++++++++++++++++ .../Wire/ConversationStore/MigrationLock.hs | 88 ++++++ .../src/Wire/ConversationStore/Postgres.hs | 15 + .../src/Wire/PostgresMigrations.hs | 1 + libs/wire-subsystems/wire-subsystems.cabal | 2 + services/galley/galley.integration.yaml | 1 + services/galley/src/Galley/API/Util.hs | 8 +- 8 files changed, 410 insertions(+), 7 deletions(-) create mode 100644 libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs create mode 100644 libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs diff --git a/libs/wire-api/src/Wire/API/Conversation/Protocol.hs b/libs/wire-api/src/Wire/API/Conversation/Protocol.hs index f2eeaebefc..221b86bc73 100644 --- a/libs/wire-api/src/Wire/API/Conversation/Protocol.hs +++ b/libs/wire-api/src/Wire/API/Conversation/Protocol.hs @@ -34,6 +34,7 @@ module Wire.API.Conversation.Protocol cnvmlsEpoch, ProtocolUpdate (..), getGroupId, + getMLSData, ) where @@ -282,3 +283,8 @@ getGroupId :: Protocol -> Maybe GroupId getGroupId (ProtocolMLS mlsData) = Just $ cnvmlsGroupId mlsData getGroupId (ProtocolMixed mlsData) = Just $ cnvmlsGroupId mlsData getGroupId _ = Nothing + +getMLSData :: Protocol -> Maybe ConversationMLSData +getMLSData (ProtocolMLS mlsData) = Just mlsData +getMLSData (ProtocolMixed mlsData) = Just mlsData +getMLSData ProtocolProteus = Nothing diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs new file mode 100644 index 0000000000..d1a07f6a61 --- /dev/null +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -0,0 +1,296 @@ +{-# LANGUAGE RecordWildCards #-} + +module Wire.ConversationStore.Migration where + +import Cassandra (ClientState) +import Data.Bits +import Data.Domain +import Data.Id +import Data.IntMap qualified as IntMap +import Data.Map qualified as Map +import Data.Qualified +import Data.Time +import Data.Tuple.Extra +import Data.UUID qualified as UUID +import Data.Vector (Vector) +import Data.Vector qualified as Vector +import Hasql.Pool qualified as Hasql +import Hasql.Session qualified as Session +import Hasql.Statement qualified as Hasql +import Hasql.TH +import Hasql.Transaction qualified as Transaction +import Hasql.Transaction.Sessions +import Imports +import Polysemy +import Polysemy.Async +import Polysemy.Error +import Polysemy.Input +import Polysemy.TinyLog +import Wire.API.Conversation hiding (Member) +import Wire.API.Conversation.Protocol +import Wire.API.Conversation.Role +import Wire.API.MLS.CipherSuite +import Wire.API.MLS.Credential +import Wire.API.MLS.GroupInfo +import Wire.API.MLS.LeafNode +import Wire.API.MLS.SubConversation +import Wire.API.PostgresMarshall +import Wire.API.Provider.Service +import Wire.ConversationStore +import Wire.ConversationStore.Cassandra (interpretConversationStoreToCassandra) +import Wire.ConversationStore.MLS.Types +import Wire.ConversationStore.MigrationLock +import Wire.ConversationStore.Postgres +import Wire.Postgres (runTransaction) +import Wire.StoredConversation + +migrateAllConversations :: Sem r () +migrateAllConversations = undefined + +migrateConversation :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r) => ConvId -> Sem r () +migrateConversation cid = do + void . withMigrationLock LockExclusive (Left cid) $ do + mConvData <- getConvFromCassandra cid + for_ mConvData $ \convData -> do + saveConvToPostgres convData + deleteConvFromCassandra convData + +data ConvMLSDetails = ConvMLSDetails + { groupInfoData :: GroupInfoData, + clientMap :: ClientMap LeafIndex, + indexMap :: IndexMap + } + +data AllSubConvData = AllSubConvData + { subConv :: SubConversation, + groupInfoData :: Maybe GroupInfoData + } + +data AllConvData = AllConvData + { conv :: StoredConversation, + mlsDetails :: Maybe ConvMLSDetails, + subConvs :: [AllSubConvData] + } + +getConvFromCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => ConvId -> Sem r (Maybe AllConvData) +getConvFromCassandra cid = withCassandra $ do + getConversation cid >>= \case + Nothing -> pure Nothing + Just conv -> do + subConvMlsData <- listSubConversations cid + mGroupInfo <- getGroupInfo cid + mlsLeafIndices <- case mlsMetadata conv of + Nothing -> pure Nothing + Just (mlsData, _) -> do + (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId + pure $ Just (cm, im) + let mlsDetails = ConvMLSDetails <$> mGroupInfo <*> fmap fst mlsLeafIndices <*> fmap snd mlsLeafIndices + subConvs <- fmap Map.elems $ flip Map.traverseWithKey subConvMlsData $ \subConvId mlsData -> do + (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId + let subconv = + SubConversation + { scParentConvId = cid, + scSubConvId = subConvId, + scMLSData = mlsData, + scMembers = cm, + scIndexMap = im + } + gi <- getSubConversationGroupInfo cid subConvId + pure $ AllSubConvData subconv gi + pure . Just $ AllConvData {..} + +deleteConvFromCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => AllConvData -> Sem r () +deleteConvFromCassandra allConvData = withCassandra $ do + for_ allConvData.subConvs $ \subConvData -> do + removeAllMLSClients subConvData.subConv.scMLSData.cnvmlsGroupId + deleteSubConversation allConvData.conv.id_ subConvData.subConv.scSubConvId + + for_ (getMLSData allConvData.conv.protocol) $ \mlsData -> + removeAllMLSClients mlsData.cnvmlsGroupId + + case allConvData.conv.metadata.cnvmTeam of + Nothing -> deleteConversation allConvData.conv.id_ + Just tid -> deleteTeamConversation tid allConvData.conv.id_ + +saveConvToPostgres :: (PGConstraints r) => AllConvData -> Sem r () +saveConvToPostgres allConvData = do + let meta = storedConv.metadata + mMlsData = getMLSData storedConv.protocol + mActiveMLSData = cnvmlsActiveData =<< mMlsData + convRow = + ( storedConv.id_, + meta.cnvmType, + meta.cnvmCreator, + Vector.fromList meta.cnvmAccess, + meta.cnvmAccessRoles, + meta.cnvmName, + meta.cnvmTeam, + meta.cnvmMessageTimer, + meta.cnvmReceiptMode, + protocolTag storedConv.protocol, + getGroupId storedConv.protocol, + (.epoch) <$> mActiveMLSData, + epochTimestamp <$> mActiveMLSData, + ciphersuite <$> mActiveMLSData, + (.groupInfoData) <$> allConvData.mlsDetails, + meta.cnvmGroupConvType, + meta.cnvmChannelAddPermission, + meta.cnvmCellsState, + meta.cnvmParent + ) + runTransaction ReadCommitted Write $ do + Transaction.statement convRow insertConv + Transaction.statement localMemberColumns insertLocalMembers + Transaction.statement remoteMemberColumns insertRemoteMembers + Transaction.statement subConvColumns insertSubConvs + Transaction.statement mlsClientColumns insertMLSClients + where + storedConv = allConvData.conv + -- In all these queries we do nothing on conflict because if the data is in + -- Postgres it is considered fresher and data from Cassandra is ignored. + insertConv = + lmapPG @_ @(_, _, _, Vector Int32, Vector Int32, _, _, _, _, _, _, _, _, _, _, _, _, _, _) + [resultlessStatement|INSERT INTO conversation + (id, type, creator, access, access_roles_v2, + name, team, message_timer, receipt_mode, protocol, + group_id, epoch, epoch_timestamp, cipher_suite, group_conv_type, + channel_add_permission, cells_state, parent_conv) + VALUES + ($1 :: uuid, $2 :: integer, $3 :: uuid?, $4 :: integer[], $5 :: integer[], + $6 :: text?, $7 :: uuid?, $8 :: bigint?, $9 :: integer?, $10 :: integer, + $11 :: bytea?, $12 :: bigint?, $13 :: timestamptz?, $14 :: integer?, $15 :: bytea?, + $16 ::integer?, $17 :: integer?, $18 :: integer, $19 :: uuid?) + ON CONFLICT (id) DO NOTHING + |] + + localMemberColumns :: + ( [ConvId], + [UserId], + [Maybe ServiceId], + [Maybe ProviderId], + [Maybe MutedStatus], + [Maybe Text], + [Bool], + [Maybe Text], + [Bool], + [Maybe Text], + [RoleName] + ) + localMemberColumns = + let mems = storedConv.localMembers + in ( replicate (length mems) storedConv.id_, + map (.id_) mems, + map (fmap (._serviceRefId) . (.service)) mems, + map (fmap (._serviceRefProvider) . (.service)) mems, + map (.status.msOtrMutedStatus) mems, + map (.status.msOtrMutedRef) mems, + map (.status.msOtrArchived) mems, + map (.status.msOtrArchivedRef) mems, + map (.status.msHidden) mems, + map (.status.msHiddenRef) mems, + map (.convRoleName) mems + ) + + remoteMemberColumns :: ([ConvId], [Domain], [UserId], [RoleName]) + remoteMemberColumns = + ( replicate (length storedConv.remoteMembers) storedConv.id_, + map (tDomain . (.id_)) storedConv.remoteMembers, + map (tUnqualified . (.id_)) storedConv.remoteMembers, + map (.convRoleName) storedConv.remoteMembers + ) + + insertLocalMembers :: + Hasql.Statement + ( [ConvId], + [UserId], + [Maybe ServiceId], + [Maybe ProviderId], + [Maybe MutedStatus], + [Maybe Text], + [Bool], + [Maybe Text], + [Bool], + [Maybe Text], + [RoleName] + ) + () + insertLocalMembers = + lmapPG @_ @(Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _) + [resultlessStatement|INSERT INTO conversation_member + (conv, "user", service, provider, otr_muted_status, otr_muted_ref, + otr_archived, otr_archived_ref, hidden, hidden_ref, conversation_role) + SELECT * + FROM UNNEST ($1 :: uuid[], $2 :: uuid[], $3 :: uuid?[], $4 :: uuid?[], + $5 :: integer?[], $6 :: text?[], $7 :: boolean[], $8 :: text?[], + $9 :: boolean[], $10 :: text?[], $11 :: text[]) + ON CONFLICT (conv, "user") DO NOTHING + |] + insertRemoteMembers :: Hasql.Statement ([ConvId], [Domain], [UserId], [RoleName]) () + insertRemoteMembers = + lmapPG @_ @(Vector _, Vector _, Vector _, Vector _) + [resultlessStatement|INSERT INTO local_conversation_remote_member + (conv, user_remote_domain, user_remote_id, conversation_role) + SELECT * FROM UNNEST($1 :: uuid[], $2 :: text[], $3 :: uuid[], $4 :: text[]) + |] + + mlsClientRows :: GroupId -> ClientMap LeafIndex -> IndexMap -> [(GroupId, Domain, UserId, ClientId, Int32, Bool)] + mlsClientRows gid clientMap indexMap = + let clients :: [(LeafIndex, ClientIdentity, Bool)] = + IntMap.elems $ + IntMap.mapWithKey + (\idx ci -> (fromIntegral idx, ci, isNothing (cmLookupIndex ci clientMap))) + indexMap.unIndexMap + in flip map clients $ \(idx, ci, removalPending) -> + (gid, ci.ciDomain, ci.ciUser, ci.ciClient, fromIntegral idx, removalPending) + + mlsClientColumns :: ([GroupId], [Domain], [UserId], [ClientId], [Int32], [Bool]) + mlsClientColumns = + let mainConvGroupId = cnvmlsGroupId <$> getMLSData storedConv.protocol + mainConvInputs = maybeToList $ (,,) <$> mainConvGroupId <*> (fmap (.clientMap) allConvData.mlsDetails) <*> (fmap (.indexMap) allConvData.mlsDetails) + subConvsInputs = flip map allConvData.subConvs $ \(AllSubConvData sc _) -> (sc.scMLSData.cnvmlsGroupId, sc.scMembers, sc.scIndexMap) + allInputs = mainConvInputs <> subConvsInputs + allRows = concatMap (uncurry3 mlsClientRows) allInputs + in unzip6 allRows + + insertMLSClients :: Hasql.Statement ([GroupId], [Domain], [UserId], [ClientId], [Int32], [Bool]) () + insertMLSClients = + lmapPG @_ @(Vector _, Vector _, Vector _, Vector _, Vector _, Vector _) + [resultlessStatement|INSERT INTO mls_group_member_client + (group_id, user_domain, "user", client, leaf_node_index, removal_pending) + SELECT * + FROM UNNEST ($1 :: bytea[], $2 :: text[], $3 :: uuid[], + $4 :: text[], $5 :: integer[], $6 :: bool[]) + |] + + subConvRows :: [(ConvId, SubConvId, Maybe CipherSuiteTag, Maybe Epoch, Maybe UTCTime, GroupId, Maybe GroupInfoData)] + subConvRows = + flip map allConvData.subConvs $ \scData -> + ( storedConv.id_, + scData.subConv.scSubConvId, + (.ciphersuite) <$> scData.subConv.scMLSData.cnvmlsActiveData, + (.epoch) <$> scData.subConv.scMLSData.cnvmlsActiveData, + (.epochTimestamp) <$> scData.subConv.scMLSData.cnvmlsActiveData, + scData.subConv.scMLSData.cnvmlsGroupId, + scData.groupInfoData + ) + + subConvColumns :: ([ConvId], [SubConvId], [Maybe CipherSuiteTag], [Maybe Epoch], [Maybe UTCTime], [GroupId], [Maybe GroupInfoData]) + subConvColumns = unzip7 subConvRows + + insertSubConvs :: Hasql.Statement ([ConvId], [SubConvId], [Maybe CipherSuiteTag], [Maybe Epoch], [Maybe UTCTime], [GroupId], [Maybe GroupInfoData]) () + insertSubConvs = + lmapPG @_ @(Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _) + [resultlessStatement|INSERT INTO subconversation + (conv_id, subconv_id, cipher_suite, epoch, epoch_timestamp, group_id, public_group_state) + SELECT * + FROM UNNEST ($1 :: uuid[], $2 :: text[], $3 :: integer?[], + $4 :: bigint?[], $5 :: timestamptz?[], $6 :: bytea[], $7 :: bytea?[]) + |] + +deleteFromCassandra :: StoredConversation -> [SubConversation] -> Sem r () +deleteFromCassandra = undefined + +withCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => InterpreterFor ConversationStore r +withCassandra action = do + cstate <- input + interpretConversationStoreToCassandra cstate action diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs new file mode 100644 index 0000000000..0f05b0ee40 --- /dev/null +++ b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs @@ -0,0 +1,88 @@ +module Wire.ConversationStore.MigrationLock where + +import Data.Bits +import Data.Id +import Data.UUID qualified as UUID +import Hasql.Pool qualified as Hasql +import Hasql.Session qualified as Session +import Hasql.Statement qualified as Hasql +import Hasql.TH +import Imports +import Polysemy +import Polysemy.Async +import Polysemy.Input +import Polysemy.TinyLog (TinyLog) +import Polysemy.TinyLog qualified as TinyLog +import System.Logger.Message qualified as Log +import Wire.ConversationStore.Postgres + +data LockType + = -- | Used for migrating a conversation, will block any other locks + LockExclusive + | -- | Used for reading and writing to Cassandra, will block exclusive locks + LockShared + +withMigrationLock :: + ( PGConstraints r, + Member Async r, + Member TinyLog r + ) => + LockType -> + Either ConvId UserId -> + Sem r a -> + Sem r a +withMigrationLock lockType convOrUser action = do + lockAcquired <- embed newEmptyMVar + actionCompleted <- embed newEmptyMVar + + pool <- input + lockThread <- async . embed . Hasql.use pool $ do + _ <- Session.statement lockId acquireLock + liftIO $ putMVar lockAcquired () + liftIO $ takeMVar actionCompleted + Session.statement lockId releaseLock + + -- TODO: We should time this out and log in case the lock is taken by another + -- process which gets stuck + embed $ takeMVar lockAcquired + res <- action + embed $ putMVar actionCompleted () + + -- TODO: Do we need a timeout here? + mEithErr <- await lockThread + case mEithErr of + Just (Right ()) -> pure () + Just (Left e) -> + TinyLog.warn $ + Log.msg (Log.val "Failed to cleanly unlock the migration lock") + . Log.field (either (const "conv") (const "user") convOrUser) (either idToText idToText convOrUser) + . Log.field "error" (show e) + Nothing -> + TinyLog.warn $ + Log.msg (Log.val "Failed to cleanly unlock the migration lock") + . Log.field (either (const "conv") (const "user") convOrUser) (either idToText idToText convOrUser) + . Log.field "error" ("N/A" :: ByteString) + pure res + where + lockId :: Int64 + lockId = fromIntegral $ case convOrUser of + Left convId -> hashUUID convId + Right userId -> hashUUID userId + + hashUUID :: Id a -> Int64 + hashUUID (toUUID -> uuid) = + let (w1, w2) = UUID.toWords64 uuid + mixed = w1 `xor` (w2 `shiftR` 32) `xor` (w2 `shiftL` 32) + in fromIntegral mixed + + acquireLock :: Hasql.Statement (Int64) () + acquireLock = + case lockType of + LockExclusive -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_lock($1 :: bigint))|] + LockShared -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_lock_shared($1 :: bigint))|] + + releaseLock :: Hasql.Statement (Int64) () + releaseLock = + case lockType of + LockExclusive -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_unlock($1 :: bigint))|] + LockShared -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_unlock_shared($1 :: bigint))|] diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index b6be39d877..574ea0241c 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -784,6 +784,21 @@ mkLocalMember (cid, uid, mServiceId, mProviderId, msOtrMutedStatus, msOtrMutedRe } ) +mkLocalMemberRow :: ConvId -> LocalMember -> LocalMemberRow +mkLocalMemberRow cid lm = + ( cid, + lm.id_, + _serviceRefId <$> lm.service, + _serviceRefProvider <$> lm.service, + lm.status.msOtrMutedStatus, + lm.status.msOtrMutedRef, + Just lm.status.msOtrArchived, + lm.status.msOtrArchivedRef, + Just lm.status.msHidden, + lm.status.msHiddenRef, + Just lm.convRoleName + ) + type RemoteMemberRow = (ConvId, Domain, UserId, RoleName) getRemoteMemberImpl :: (PGConstraints r) => ConvId -> Remote UserId -> Sem r (Maybe RemoteMember) diff --git a/libs/wire-subsystems/src/Wire/PostgresMigrations.hs b/libs/wire-subsystems/src/Wire/PostgresMigrations.hs index ba5452b3e8..d723a9f96b 100644 --- a/libs/wire-subsystems/src/Wire/PostgresMigrations.hs +++ b/libs/wire-subsystems/src/Wire/PostgresMigrations.hs @@ -1,4 +1,5 @@ {-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fforce-recomp #-} module Wire.PostgresMigrations where diff --git a/libs/wire-subsystems/wire-subsystems.cabal b/libs/wire-subsystems/wire-subsystems.cabal index a22d24a034..42ec6bf8e3 100644 --- a/libs/wire-subsystems/wire-subsystems.cabal +++ b/libs/wire-subsystems/wire-subsystems.cabal @@ -190,6 +190,8 @@ library Wire.ConversationStore.Cassandra Wire.ConversationStore.Cassandra.Instances Wire.ConversationStore.Cassandra.Queries + Wire.ConversationStore.Migration + Wire.ConversationStore.MigrationLock Wire.ConversationStore.MLS.Types Wire.ConversationStore.Postgres Wire.DeleteQueue diff --git a/services/galley/galley.integration.yaml b/services/galley/galley.integration.yaml index f5b43df220..5cc259464b 100644 --- a/services/galley/galley.integration.yaml +++ b/services/galley/galley.integration.yaml @@ -196,3 +196,4 @@ journal: # if set, journals; if not set, disables journaling postgresMigration: conversation: postgresql + # conversation: cassandra diff --git a/services/galley/src/Galley/API/Util.hs b/services/galley/src/Galley/API/Util.hs index 545ac8a6a6..235acd3beb 100644 --- a/services/galley/src/Galley/API/Util.hs +++ b/services/galley/src/Galley/API/Util.hs @@ -303,7 +303,7 @@ checkGroupIdSupport :: Sem r () checkGroupIdSupport loc conv joinAction = void $ runMaybeT $ do -- if it is an MLS conversation - d <- MaybeT (pure (getMLSData conv)) + d <- MaybeT (pure (getMLSData conv.protocol)) -- if the group ID version is not 1 (v, _) <- @@ -322,12 +322,6 @@ checkGroupIdSupport loc conv joinAction = void $ runMaybeT $ do failOnFirstError :: (Member (ErrorS GroupIdVersionNotSupported) r) => [Either e x] -> Sem r () failOnFirstError = traverse_ $ either (\_ -> throwS @GroupIdVersionNotSupported) pure -getMLSData :: StoredConversation -> Maybe ConversationMLSData -getMLSData conv = case conv.protocol of - ProtocolMLS d -> Just d - ProtocolMixed d -> Just d - ProtocolProteus -> Nothing - -- | Same as 'permissionCheck', but for a statically known permission. permissionCheckS :: forall teamAssociation perm (p :: perm) r. From 4a3418ab6ea377551e12faed8f4969cf8353d782 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 9 Oct 2025 16:50:09 +0200 Subject: [PATCH 02/49] ConversationStore.Cassandra: Add hybrid interpretter, which will work during migration Exceptions: 1. Getting a paginated list of qualified conv Ids 2. It will not work well if the migration fails to delete conv data from Cassandra after copying it to postgres These problems will be solved in following commits --- .../src/Wire/Sem/Paging/Cassandra.hs | 4 +- libs/wire-subsystems/default.nix | 3 + .../src/Wire/ConversationStore.hs | 2 + .../src/Wire/ConversationStore/Cassandra.hs | 441 ++++++++++++++++++ .../src/Wire/ConversationStore/Migration.hs | 10 +- .../Wire/ConversationStore/MigrationLock.hs | 103 ++-- .../src/Wire/ConversationStore/Postgres.hs | 13 + libs/wire-subsystems/wire-subsystems.cabal | 1 + 8 files changed, 535 insertions(+), 42 deletions(-) diff --git a/libs/polysemy-wire-zoo/src/Wire/Sem/Paging/Cassandra.hs b/libs/polysemy-wire-zoo/src/Wire/Sem/Paging/Cassandra.hs index 9958cab2d6..12210c3c8a 100644 --- a/libs/polysemy-wire-zoo/src/Wire/Sem/Paging/Cassandra.hs +++ b/libs/polysemy-wire-zoo/src/Wire/Sem/Paging/Cassandra.hs @@ -23,11 +23,9 @@ module Wire.Sem.Paging.Cassandra InternalPagingState (..), mkInternalPage, ipNext, - ResultSet, + ResultSet (..), mkResultSet, mkResultSetByLength, - resultSetResult, - resultSetType, ResultSetType (..), ) where diff --git a/libs/wire-subsystems/default.nix b/libs/wire-subsystems/default.nix index 498d21aa2f..167aee9899 100644 --- a/libs/wire-subsystems/default.nix +++ b/libs/wire-subsystems/default.nix @@ -65,6 +65,7 @@ , network , network-conduit-tls , polysemy +, polysemy-conc , polysemy-plugin , polysemy-time , polysemy-wire-zoo @@ -175,6 +176,7 @@ mkDerivation { network network-conduit-tls polysemy + polysemy-conc polysemy-plugin polysemy-time polysemy-wire-zoo @@ -274,6 +276,7 @@ mkDerivation { network network-conduit-tls polysemy + polysemy-conc polysemy-plugin polysemy-time polysemy-wire-zoo diff --git a/libs/wire-subsystems/src/Wire/ConversationStore.hs b/libs/wire-subsystems/src/Wire/ConversationStore.hs index 0bd1191278..2eb73088b7 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore.hs @@ -134,6 +134,8 @@ data ConversationStore m a where ListSubConversations :: ConvId -> ConversationStore m (Map SubConvId ConversationMLSData) DeleteSubConversation :: ConvId -> SubConvId -> ConversationStore m () SearchConversations :: ConversationSearch -> ConversationStore m [ConversationSearchResult] + -- FOR MIGRATION + HaveRemoteConvs :: [UserId] -> ConversationStore m [UserId] makeSem ''ConversationStore diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 22ae8ca1a7..11570aef6f 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -18,6 +18,8 @@ module Wire.ConversationStore.Cassandra ( interpretMLSCommitLockStoreToCassandra, interpretConversationStoreToCassandra, + interpretConversationStoreToCassandraAndPostgres, + MigrationError (..), ) where @@ -43,7 +45,11 @@ import Data.Set qualified as Set import Data.Time import Imports import Polysemy +import Polysemy.Async (Async) +import Polysemy.Conc import Polysemy.Embed +import Polysemy.Error (Error, throw) +import Polysemy.Time import Polysemy.TinyLog import System.Logger qualified as Log import UnliftIO qualified @@ -53,16 +59,20 @@ import Wire.API.Conversation.Protocol import Wire.API.Conversation.Role hiding (DeleteConversation) import Wire.API.MLS.CipherSuite import Wire.API.MLS.Credential +import Wire.API.MLS.Group.Serialisation import Wire.API.MLS.GroupInfo import Wire.API.MLS.LeafNode (LeafIndex) import Wire.API.MLS.SubConversation import Wire.API.Provider.Service import Wire.API.Routes.MultiTablePaging import Wire.ConversationStore (ConversationStore (..), LockAcquired (..), MLSCommitLockStore (..)) +import Wire.ConversationStore qualified as ConvStore import Wire.ConversationStore.Cassandra.Instances () import Wire.ConversationStore.Cassandra.Queries qualified as Cql import Wire.ConversationStore.Cassandra.Queries qualified as Queries import Wire.ConversationStore.MLS.Types +import Wire.ConversationStore.MigrationLock +import Wire.ConversationStore.Postgres (PGConstraints, interpretConversationStoreToPostgres) import Wire.Sem.Paging.Cassandra import Wire.StoredConversation import Wire.StoredConversation qualified as StoreConv @@ -714,6 +724,17 @@ lookupLocalMemberRemoteConv uid (tUntagged -> Qualified conv dom) = x5 (query1 Cql.selectRemoteConvMembers (params LocalQuorum (uid, dom, conv))) +haveRemoteConvs :: [UserId] -> Client [UserId] +haveRemoteConvs uids = + catMaybes <$> UnliftIO.pooledMapConcurrentlyN 16 runSelect uids + where + selectUserFromRemoteConv :: PrepQuery R (Identity UserId) (Identity UserId) + selectUserFromRemoteConv = "select user from user_remote_conv where user = ? limit 1" + + runSelect :: UserId -> Client (Maybe UserId) + runSelect uid = + runIdentity <$$> retry x5 (query1 selectUserFromRemoteConv (params LocalQuorum (Identity uid))) + removeLocalMembersFromRemoteConv :: -- | The conversation to remove members from Remote ConvId -> @@ -1063,3 +1084,423 @@ interpretConversationStoreToCassandra client = interpret $ \case SearchConversations _ -> do logEffect "ConversationStore.SearchConversations" pure [] + HaveRemoteConvs uids -> + runEmbedded (runClient client) $ embed $ haveRemoteConvs uids + +interpretConversationStoreToCassandraAndPostgres :: + forall r a. + ( Member TinyLog r, + PGConstraints r, + Member Async r, + Member (Error MigrationError) r, + Member (Error MigrationLockError) r, + Member Race r + ) => + ClientState -> + Sem (ConversationStore ': r) a -> + Sem r a +interpretConversationStoreToCassandraAndPostgres client = interpret $ \case + UpsertConversation lcnv nc -> do + withMigrationLock LockShared (Left $ tUnqualified lcnv) $ + embedClient client (getConversation (tUnqualified lcnv)) >>= \case + Nothing -> interpretConversationStoreToPostgres $ ConvStore.upsertConversation lcnv nc + Just _ -> embedClient client $ createConversation lcnv nc + GetConversation cid -> do + logEffect "ConversationStore.GetConversation" + withMigrationLock LockShared (Left cid) $ + getConvWithPostgres cid >>= \case + Nothing -> embedClient client (getConversation cid) + conv -> pure conv + GetConversationEpoch cid -> do + logEffect "ConversationStore.GetConversationEpoch" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client (getConvEpoch cid) + True -> interpretConversationStoreToPostgres $ ConvStore.getConversationEpoch cid + GetConversations cids -> do + logEffect "ConversationStore.GetConversations" + let indexByConvId = foldr (\storedConv -> Map.insert storedConv.id_ storedConv) Map.empty + -- Important to read Cassandra first, otherwise we could miss a conv which + -- got migrated while we were reading Postgres + cassConvs <- indexByConvId <$> localConversations client cids + pgConvs <- indexByConvId <$> interpretConversationStoreToPostgres (ConvStore.getConversations cids) + pure $ mapMaybe (\cid -> Map.lookup cid pgConvs <|> Map.lookup cid cassConvs) cids + GetLocalConversationIds uid start maxIds -> do + logEffect "ConversationStore.GetLocalConversationIds" + + -- Important to read Cassandra first, otherwise we could miss a conv which + -- got migrated while we were reading Postgres + cassConvIds <- embedClient client $ getLocalConvIds uid start maxIds + pgConvIds <- interpretConversationStoreToPostgres $ ConvStore.getLocalConversationIds uid start maxIds + + let allResults = List.nubOrd (pgConvIds.resultSetResult <> cassConvIds.resultSetResult) + maxIdsInt = (fromIntegral $ fromRange maxIds) + pure $ + ResultSet + { resultSetResult = take maxIdsInt allResults, + resultSetType = + if cassConvIds.resultSetType == ResultSetTruncated + || pgConvIds.resultSetType == ResultSetTruncated + || length allResults > maxIdsInt + then ResultSetTruncated + else ResultSetComplete + } + GetConversationIds uid maxIds pagingState -> do + logEffect "ConversationStore.GetConversationIds" + -- TODO: Deal with paginating across DBs + embedClient client $ getConvIds uid maxIds pagingState + GetConversationMetadata cid -> do + logEffect "ConversationStore.GetConversationMetadata" + withMigrationLock LockShared (Left cid) $ + interpretConversationStoreToPostgres (ConvStore.getConversationMetadata cid) >>= \case + Nothing -> embedClient client (conversationMeta cid) + meta -> pure meta + GetGroupInfo cid -> do + logEffect "ConversationStore.GetGroupInfo" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client (getGroupInfo cid) + True -> interpretConversationStoreToPostgres (ConvStore.getGroupInfo cid) + IsConversationAlive cid -> do + logEffect "ConversationStore.IsConversationAlive" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client (isConvAlive cid) + True -> interpretConversationStoreToPostgres (ConvStore.isConversationAlive cid) + SelectConversations uid cids -> do + logEffect "ConversationStore.SelectConversations" + -- TODO: Figure out what to do about convs which could be left behind in cassandra + withMigrationLocks LockShared (Seconds 2) (Left <$> cids) $ do + cassConvs <- embedClient client $ localConversationIdsOf uid cids + pgConvs <- interpretConversationStoreToPostgres $ ConvStore.selectConversations uid cids + pure $ List.nubOrd (pgConvs <> cassConvs) + GetRemoteConversationStatus uid cids -> do + logEffect "ConversationStore.GetRemoteConversationStatus" + withMigrationLock LockShared (Right uid) $ do + isUserInPostgres uid >>= \case + False -> embedClient client $ remoteConversationStatus uid cids + True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConversationStatus uid cids + SetConversationType cid ty -> do + logEffect "ConversationStore.SetConversationType" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvType cid ty + True -> interpretConversationStoreToPostgres (ConvStore.setConversationType cid ty) + SetConversationName cid value -> do + logEffect "ConversationStore.SetConversationName" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvName cid value + True -> interpretConversationStoreToPostgres (ConvStore.setConversationName cid value) + SetConversationAccess cid value -> do + logEffect "ConversationStore.SetConversationAccess" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvAccess cid value + True -> interpretConversationStoreToPostgres (ConvStore.setConversationAccess cid value) + SetConversationReceiptMode cid value -> do + logEffect "ConversationStore.SetConversationReceiptMode" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvReceiptMode cid value + True -> interpretConversationStoreToPostgres (ConvStore.setConversationReceiptMode cid value) + SetConversationMessageTimer cid value -> do + logEffect "ConversationStore.SetConversationMessageTimer" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvMessageTimer cid value + True -> interpretConversationStoreToPostgres (ConvStore.setConversationMessageTimer cid value) + SetConversationEpoch cid epoch -> do + logEffect "ConversationStore.SetConversationEpoch" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvEpoch cid epoch + True -> interpretConversationStoreToPostgres (ConvStore.setConversationEpoch cid epoch) + SetConversationCipherSuite cid cs -> do + logEffect "ConversationStore.SetConversationCipherSuite" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvCipherSuite cid cs + True -> interpretConversationStoreToPostgres (ConvStore.setConversationCipherSuite cid cs) + SetConversationCellsState cid ps -> do + logEffect "ConversationStore.SetConversationCellsState" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateConvCellsState cid ps + True -> interpretConversationStoreToPostgres (ConvStore.setConversationCellsState cid ps) + ResetConversation cid groupId -> do + logEffect "ConversationStore.ResetConversation" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ resetConversation cid groupId + True -> interpretConversationStoreToPostgres (ConvStore.resetConversation cid groupId) + DeleteConversation cid -> do + logEffect "ConversationStore.DeleteConversation" + withMigrationLock LockShared (Left cid) $ do + embedClient client $ deleteConversation cid + interpretConversationStoreToPostgres (ConvStore.deleteConversation cid) + SetGroupInfo cid gib -> do + logEffect "ConversationStore.SetGroupInfo" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ setGroupInfo cid gib + True -> interpretConversationStoreToPostgres (ConvStore.setGroupInfo cid gib) + UpdateToMixedProtocol cid groupId epoch -> do + logEffect "ConversationStore.UpdateToMixedProtocol" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> updateToMixedProtocol client cid groupId epoch + True -> interpretConversationStoreToPostgres (ConvStore.updateToMixedProtocol cid groupId epoch) + UpdateToMLSProtocol cid -> do + logEffect "ConversationStore.UpdateToMLSProtocol" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> updateToMLSProtocol client cid + _ -> interpretConversationStoreToPostgres (ConvStore.updateToMLSProtocol cid) + UpdateChannelAddPermissions cid cap -> do + logEffect "ConversationStore.UpdateChannelAddPermissions" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> embedClient client $ updateChannelAddPermissions cid cap + _ -> interpretConversationStoreToPostgres (ConvStore.updateChannelAddPermissions cid cap) + DeleteTeamConversation tid cid -> do + logEffect "ConversationStore.DeleteTeamConversation" + withMigrationLock LockShared (Left cid) $ do + embedClient client $ removeTeamConv tid cid + interpretConversationStoreToPostgres (ConvStore.deleteTeamConversation tid cid) + GetTeamConversation tid cid -> do + logEffect "ConversationStore.GetTeamConversation" + withMigrationLock LockShared (Left cid) $ + interpretConversationStoreToPostgres (ConvStore.getTeamConversation tid cid) >>= \case + Just foundCid -> pure $ Just foundCid + Nothing -> embedClient client $ teamConversation tid cid + GetTeamConversations tid -> do + logEffect "ConversationStore.GetTeamConversations" + -- TODO: This could return some deleted conversations if they get left + -- behind in cassandra while migration and then deleted from postgresql. + -- + -- Figure out a way to deal with this. + cassConvs <- embedClient client $ getTeamConversations tid + pgConvs <- interpretConversationStoreToPostgres $ ConvStore.getTeamConversations tid + pure $ List.nubOrd (pgConvs <> cassConvs) + DeleteTeamConversations tid -> do + logEffect "ConversationStore.DeleteTeamConversations" + embedClient client $ deleteTeamConversations tid + interpretConversationStoreToPostgres $ ConvStore.deleteTeamConversations tid + UpsertMembers cid ul -> do + logEffect "ConversationStore.CreateMembers" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ addMembers cid ul + _ -> interpretConversationStoreToPostgres (ConvStore.upsertMembers cid ul) + UpsertMembersInRemoteConversation rcid uids -> do + logEffect "ConversationStore.CreateMembersInRemoteConversation" + withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + filterUsersInPostgres uids >>= \pgUids -> do + interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid pgUids + let cassUids = filter (`notElem` pgUids) uids + runEmbedded (runClient client) $ embed $ addLocalMembersToRemoteConv rcid cassUids + CreateBotMember sr bid cid -> do + logEffect "ConversationStore.CreateBotMember" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ addBotMember sr bid cid + _ -> interpretConversationStoreToPostgres (ConvStore.createBotMember sr bid cid) + GetLocalMember cid uid -> do + logEffect "ConversationStore.GetLocalMember" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ member cid uid + True -> interpretConversationStoreToPostgres (ConvStore.getLocalMember cid uid) + GetLocalMembers cid -> do + logEffect "ConversationStore.GetLocalMembers" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ members cid + True -> interpretConversationStoreToPostgres (ConvStore.getLocalMembers cid) + GetRemoteMember cid uid -> do + logEffect "ConversationStore.GetRemoteMember" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) + True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMember cid uid) + GetRemoteMembers cid -> do + logEffect "ConversationStore.GetRemoteMembers" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ lookupRemoteMembers cid + True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMembers cid) + CheckLocalMemberRemoteConv uid rcnv -> do + logEffect "ConversationStore.CheckLocalMemberRemoteConv" + withMigrationLock LockShared (Right uid) $ do + isUserInPostgres uid >>= \case + False -> fmap (not . null) $ runEmbedded (runClient client) $ embed $ lookupLocalMemberRemoteConv uid rcnv + True -> interpretConversationStoreToPostgres $ ConvStore.checkLocalMemberRemoteConv uid rcnv + SelectRemoteMembers uids rcnv -> do + logEffect "ConversationStore.SelectRemoteMembers" + withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + filterUsersInPostgres uids >>= \pgUids -> do + (pgUsers, _) <- interpretConversationStoreToPostgres $ ConvStore.selectRemoteMembers pgUids rcnv + (cassUsers, _) <- runEmbedded (runClient client) $ embed $ filterRemoteConvMembers uids rcnv + let foundUsers = pgUsers <> cassUsers + pure (foundUsers, Set.fromList foundUsers == Set.fromList uids) + SetSelfMember qcid luid upd -> do + logEffect "ConversationStore.SetSelfMember" + let localConvFunctions lcid = + ( withMigrationLock LockShared (Left (tUnqualified lcid)), + isConvInPostgres (tUnqualified lcid) + ) + remoteConvFunctions _ = + ( withMigrationLock (LockShared) (Right (tUnqualified luid)), + isUserInPostgres (tUnqualified luid) + ) + let (withLock, isInPG) = foldQualified luid localConvFunctions remoteConvFunctions qcid + withLock $ + isInPG >>= \case + False -> runEmbedded (runClient client) $ embed $ updateSelfMember qcid luid upd + True -> interpretConversationStoreToPostgres $ ConvStore.setSelfMember qcid luid upd + SetOtherMember lcid quid upd -> do + logEffect "ConversationStore.SetOtherMember" + withMigrationLock LockShared (Left $ tUnqualified lcid) $ + isConvInPostgres (tUnqualified lcid) >>= \case + False -> runEmbedded (runClient client) $ embed $ updateOtherMemberLocalConv lcid quid upd + True -> interpretConversationStoreToPostgres (ConvStore.setOtherMember lcid quid upd) + DeleteMembers cid ul -> do + logEffect "ConversationStore.DeleteMembers" + withMigrationLock LockShared (Left cid) $ do + -- No need to check where these are, we just delete them from both places + runEmbedded (runClient client) $ embed $ removeMembersFromLocalConv cid ul + interpretConversationStoreToPostgres $ ConvStore.deleteMembers cid ul + DeleteMembersInRemoteConversation rcnv uids -> do + logEffect "ConversationStore.DeleteMembersInRemoteConversation" + withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + -- No need to check where these are, we just delete them from both places + runEmbedded (runClient client) $ embed $ removeLocalMembersFromRemoteConv rcnv uids + interpretConversationStoreToPostgres $ ConvStore.deleteMembersInRemoteConversation rcnv uids + AddMLSClients groupId quid cs -> do + logEffect "ConversationStore.AddMLSClients" + cid <- groupIdToConvId groupId + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ addMLSClients groupId quid cs + True -> interpretConversationStoreToPostgres (ConvStore.addMLSClients groupId quid cs) + PlanClientRemoval gid clients -> do + logEffect "ConversationStore.PlanClientRemoval" + cid <- groupIdToConvId gid + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ planMLSClientRemoval gid clients + True -> interpretConversationStoreToPostgres (ConvStore.planClientRemoval gid clients) + RemoveMLSClients gid quid cs -> do + logEffect "ConversationStore.RemoveMLSClients" + cid <- groupIdToConvId gid + withMigrationLock LockShared (Left cid) $ do + runEmbedded (runClient client) $ embed $ removeMLSClients gid quid cs + interpretConversationStoreToPostgres (ConvStore.removeMLSClients gid quid cs) + RemoveAllMLSClients gid -> do + logEffect "ConversationStore.RemoveAllMLSClients" + cid <- groupIdToConvId gid + withMigrationLock LockShared (Left cid) $ do + runEmbedded (runClient client) $ embed $ removeAllMLSClients gid + interpretConversationStoreToPostgres (ConvStore.removeAllMLSClients gid) + LookupMLSClients gid -> do + logEffect "ConversationStore.LookupMLSClients" + cid <- groupIdToConvId gid + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ lookupMLSClients gid + True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClients gid) + LookupMLSClientLeafIndices gid -> do + logEffect "ConversationStore.LookupMLSClientLeafIndices" + cid <- groupIdToConvId gid + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ lookupMLSClientLeafIndices gid + True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClientLeafIndices gid) + UpsertSubConversation convId subConvId groupId -> do + logEffect "ConversationStore.CreateSubConversation" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ insertSubConversation convId subConvId groupId + True -> interpretConversationStoreToPostgres (ConvStore.upsertSubConversation convId subConvId groupId) + GetSubConversation convId subConvId -> do + logEffect "ConversationStore.GetSubConversation" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ selectSubConversation convId subConvId + True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversation convId subConvId + GetSubConversationGroupInfo convId subConvId -> do + logEffect "ConversationStore.GetSubConversationGroupInfo" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ selectSubConvGroupInfo convId subConvId + True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationGroupInfo convId subConvId + GetSubConversationEpoch convId subConvId -> do + logEffect "ConversationStore.GetSubConversationEpoch" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ selectSubConvEpoch convId subConvId + True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationEpoch convId subConvId + SetSubConversationGroupInfo convId subConvId mPgs -> do + logEffect "ConversationStore.SetSubConversationGroupInfo" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ updateSubConvGroupInfo convId subConvId mPgs + True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationGroupInfo convId subConvId mPgs + SetSubConversationEpoch cid sconv epoch -> do + logEffect "ConversationStore.SetSubConversationEpoch" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ setEpochForSubConversation cid sconv epoch + True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationEpoch cid sconv epoch + SetSubConversationCipherSuite cid sconv cs -> do + logEffect "ConversationStore.SetSubConversationCipherSuite" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ setCipherSuiteForSubConversation cid sconv cs + True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationCipherSuite cid sconv cs + ListSubConversations cid -> do + logEffect "ConversationStore.ListSubConversations" + withMigrationLock LockShared (Left cid) $ + isConvInPostgres cid >>= \case + False -> runEmbedded (runClient client) $ embed $ listSubConversations cid + True -> interpretConversationStoreToPostgres $ ConvStore.listSubConversations cid + DeleteSubConversation convId subConvId -> do + logEffect "ConversationStore.DeleteSubConversation" + withMigrationLock LockShared (Left convId) $ + isConvInPostgres convId >>= \case + False -> runEmbedded (runClient client) $ embed $ deleteSubConversation convId subConvId + True -> interpretConversationStoreToPostgres $ ConvStore.deleteSubConversation convId subConvId + HaveRemoteConvs uids -> do + logEffect "ConversationStore.DeleteSubConversation" + withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + remotesInCass <- runEmbedded (runClient client) $ embed $ haveRemoteConvs uids + remotesInPG <- interpretConversationStoreToPostgres $ ConvStore.haveRemoteConvs uids + pure $ List.nubOrd (remotesInPG <> remotesInCass) + +getConvWithPostgres :: (PGConstraints r) => ConvId -> Sem r (Maybe StoredConversation) +getConvWithPostgres cid = interpretConversationStoreToPostgres $ ConvStore.getConversation cid + +isConvInPostgres :: (PGConstraints r) => ConvId -> Sem r Bool +isConvInPostgres cid = interpretConversationStoreToPostgres $ ConvStore.isConversationAlive cid + +-- | Here a user being in postgres means that their remote conv memberships have +-- been migrated to Postgres +isUserInPostgres :: (PGConstraints r) => UserId -> Sem r Bool +isUserInPostgres uid = do + filterUsersInPostgres [uid] >>= \case + [] -> pure False + _ -> pure True + +filterUsersInPostgres :: (PGConstraints r) => [UserId] -> Sem r [UserId] +filterUsersInPostgres uids = do + interpretConversationStoreToPostgres (ConvStore.haveRemoteConvs uids) + +-- | Assumes that the GroupId is local +groupIdToConvId :: (Member (Error MigrationError) r) => GroupId -> Sem r ConvId +groupIdToConvId gid = + case groupIdToConv gid of + Left _ -> throw InvalidGroupId + Right (_, gidParts) -> pure gidParts.qConvId.qUnqualified.conv + +data MigrationError = InvalidGroupId diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index d1a07f6a61..e1a383ebe5 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -3,7 +3,6 @@ module Wire.ConversationStore.Migration where import Cassandra (ClientState) -import Data.Bits import Data.Domain import Data.Id import Data.IntMap qualified as IntMap @@ -11,11 +10,8 @@ import Data.Map qualified as Map import Data.Qualified import Data.Time import Data.Tuple.Extra -import Data.UUID qualified as UUID import Data.Vector (Vector) import Data.Vector qualified as Vector -import Hasql.Pool qualified as Hasql -import Hasql.Session qualified as Session import Hasql.Statement qualified as Hasql import Hasql.TH import Hasql.Transaction qualified as Transaction @@ -23,8 +19,10 @@ import Hasql.Transaction.Sessions import Imports import Polysemy import Polysemy.Async +import Polysemy.Conc import Polysemy.Error import Polysemy.Input +import Polysemy.Time import Polysemy.TinyLog import Wire.API.Conversation hiding (Member) import Wire.API.Conversation.Protocol @@ -47,9 +45,9 @@ import Wire.StoredConversation migrateAllConversations :: Sem r () migrateAllConversations = undefined -migrateConversation :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r) => ConvId -> Sem r () +migrateConversation :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => ConvId -> Sem r () migrateConversation cid = do - void . withMigrationLock LockExclusive (Left cid) $ do + void . withMigrationLocks LockExclusive (Seconds 10) [Left cid] $ do mConvData <- getConvFromCassandra cid for_ mConvData $ \convData -> do saveConvToPostgres convData diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs index 0f05b0ee40..efa03dfe34 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs @@ -3,6 +3,7 @@ module Wire.ConversationStore.MigrationLock where import Data.Bits import Data.Id import Data.UUID qualified as UUID +import Data.Vector (Vector) import Hasql.Pool qualified as Hasql import Hasql.Session qualified as Session import Hasql.Statement qualified as Hasql @@ -10,10 +11,14 @@ import Hasql.TH import Imports import Polysemy import Polysemy.Async +import Polysemy.Conc.Effect.Race +import Polysemy.Error import Polysemy.Input +import Polysemy.Time.Data.TimeUnit import Polysemy.TinyLog (TinyLog) import Polysemy.TinyLog qualified as TinyLog import System.Logger.Message qualified as Log +import Wire.API.PostgresMarshall import Wire.ConversationStore.Postgres data LockType @@ -23,49 +28,67 @@ data LockType LockShared withMigrationLock :: + (PGConstraints r, Member Async r, Member TinyLog r, Member Race r, Member (Error MigrationLockError) r) => + LockType -> + Either ConvId UserId -> + Sem r a -> + Sem r a +withMigrationLock ty key = withMigrationLocks ty (MilliSeconds 500) [key] + +data MigrationLockError = TimedOutAcquiringLock + +withMigrationLocks :: ( PGConstraints r, Member Async r, - Member TinyLog r + Member TinyLog r, + Member Race r, + Member (Error MigrationLockError) r, + TimeUnit u ) => LockType -> - Either ConvId UserId -> + u -> + [Either ConvId UserId] -> Sem r a -> Sem r a -withMigrationLock lockType convOrUser action = do +withMigrationLocks lockType maxWait convOrUsers action = do lockAcquired <- embed newEmptyMVar actionCompleted <- embed newEmptyMVar pool <- input lockThread <- async . embed . Hasql.use pool $ do - _ <- Session.statement lockId acquireLock + let lockIds = map mkLockId convOrUsers + Session.statement lockIds acquireLocks + liftIO $ putMVar lockAcquired () liftIO $ takeMVar actionCompleted - Session.statement lockId releaseLock - -- TODO: We should time this out and log in case the lock is taken by another - -- process which gets stuck - embed $ takeMVar lockAcquired + Session.statement lockIds releaseLocks + + void . timeout (cancel lockThread >> throw TimedOutAcquiringLock) maxWait $ embed (takeMVar lockAcquired) res <- action embed $ putMVar actionCompleted () - -- TODO: Do we need a timeout here? - mEithErr <- await lockThread + mEithErr <- timeout (cancel lockThread) (Seconds 1) $ await lockThread + let logFirstLock = + case convOrUsers of + [] -> id + (convOrUser : _) -> Log.field (either (const "first_conv") (const "first_user") convOrUser) (either idToText idToText convOrUser) + logError errorStr = + TinyLog.warn $ + Log.msg (Log.val "Failed to cleanly unlock the migration locks") + . logFirstLock + . Log.field "numberOfLocks" (length convOrUsers) + . Log.field "error" errorStr case mEithErr of - Just (Right ()) -> pure () - Just (Left e) -> - TinyLog.warn $ - Log.msg (Log.val "Failed to cleanly unlock the migration lock") - . Log.field (either (const "conv") (const "user") convOrUser) (either idToText idToText convOrUser) - . Log.field "error" (show e) - Nothing -> - TinyLog.warn $ - Log.msg (Log.val "Failed to cleanly unlock the migration lock") - . Log.field (either (const "conv") (const "user") convOrUser) (either idToText idToText convOrUser) - . Log.field "error" ("N/A" :: ByteString) + Left () -> logError "timed out waiting for unlock" + Right (Nothing) -> logError "lock/unlock thread didn't finish" + Right (Just (Left e)) -> logError (show e) + Right (Just (Right ())) -> pure () + pure res where - lockId :: Int64 - lockId = fromIntegral $ case convOrUser of + mkLockId :: Either ConvId UserId -> Int64 + mkLockId convOrUser = fromIntegral $ case convOrUser of Left convId -> hashUUID convId Right userId -> hashUUID userId @@ -75,14 +98,28 @@ withMigrationLock lockType convOrUser action = do mixed = w1 `xor` (w2 `shiftR` 32) `xor` (w2 `shiftL` 32) in fromIntegral mixed - acquireLock :: Hasql.Statement (Int64) () - acquireLock = - case lockType of - LockExclusive -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_lock($1 :: bigint))|] - LockShared -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_lock_shared($1 :: bigint))|] + acquireLocks :: Hasql.Statement [Int64] () + acquireLocks = + lmapPG @[_] @(Vector _) + case lockType of + LockExclusive -> + [resultlessStatement|SELECT (1 :: int) + FROM (SELECT pg_advisory_lock(lockId) + FROM (SELECT UNNEST($1 :: bigint[]) as lockId))|] + LockShared -> + [resultlessStatement|SELECT (1 :: int) + FROM (SELECT pg_advisory_lock_shared(lockId) + FROM (SELECT UNNEST($1 :: bigint[]) as lockId))|] - releaseLock :: Hasql.Statement (Int64) () - releaseLock = - case lockType of - LockExclusive -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_unlock($1 :: bigint))|] - LockShared -> [resultlessStatement|SELECT (1 :: int) FROM (SELECT pg_advisory_unlock_shared($1 :: bigint))|] + releaseLocks :: Hasql.Statement [Int64] () + releaseLocks = + lmapPG @[_] @(Vector _) + case lockType of + LockExclusive -> + [resultlessStatement|SELECT (1 :: int) + FROM (SELECT pg_advisory_unlock(lockId) + FROM (SELECT UNNEST($1 :: bigint[]) as lockId))|] + LockShared -> + [resultlessStatement|SELECT (1 :: int) + FROM (SELECT pg_advisory_unlock_shared(lockId) + FROM (SELECT UNNEST($1 :: bigint[]) as lockId))|] diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index 574ea0241c..9e28803b64 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -116,6 +116,7 @@ interpretConversationStoreToPostgres = interpret $ \case ListSubConversations cid -> listSubConversationsImpl cid DeleteSubConversation convId subConvId -> deleteSubConversationImpl convId subConvId SearchConversations search -> searchConversationsImpl search + HaveRemoteConvs uids -> haveRemoteConvsImpl uids upsertConversationImpl :: (PGConstraints r) => Local ConvId -> NewConversation -> Sem r StoredConversation upsertConversationImpl lcnv nc = do @@ -870,6 +871,18 @@ checkLocalMemberRemoteConvImpl uid (tUntagged -> Qualified convId domain) = ) :: boolean |] +haveRemoteConvsImpl :: (PGConstraints r) => [UserId] -> Sem r [UserId] +haveRemoteConvsImpl uid = + runStatement uid select + where + select :: Hasql.Statement [UserId] [UserId] + select = + dimapPG @[_] @(Vector _) @(Vector _) @[_] + [vectorStatement|SELECT DISTINCT "user" :: uuid + FROM remote_conversation_local_member + WHERE "user" = ANY ($1 :: uuid[]) + |] + selectRemoteMembersImpl :: (PGConstraints r) => [UserId] -> Remote ConvId -> Sem r ([UserId], Bool) selectRemoteMembersImpl uids (tUntagged -> Qualified cid domain) = do foundUids <- runStatement (domain, cid, uids) select diff --git a/libs/wire-subsystems/wire-subsystems.cabal b/libs/wire-subsystems/wire-subsystems.cabal index 42ec6bf8e3..45f6c041da 100644 --- a/libs/wire-subsystems/wire-subsystems.cabal +++ b/libs/wire-subsystems/wire-subsystems.cabal @@ -122,6 +122,7 @@ common common-all , network , network-conduit-tls , polysemy + , polysemy-conc , polysemy-plugin , polysemy-time , polysemy-wire-zoo From 1a736f93260eaf018247d352ac54cb9cc7ea9bb5 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 14:10:54 +0200 Subject: [PATCH 03/49] ConverastionStore.Cassandra: Fix edge case bug in getLocalConvIds When fetching `maxIds + 1` convs, it can happen that a user has exactly those many convs left, in this case the `hasMore` field of the page would be false, but we'd be sending a truncated list of convs. --- libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 11570aef6f..3d4349a808 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -272,11 +272,9 @@ localConversationIdsOf usr cids = do getLocalConvIds :: UserId -> Maybe ConvId -> Range 1 1000 Int32 -> Client (ResultSet ConvId) getLocalConvIds usr start (fromRange -> maxIds) = do - mkResultSet . strip . fmap runIdentity <$> case start of + mkResultSetByLength (fromIntegral maxIds) . fmap runIdentity . result <$> case start of Just c -> paginate Cql.selectUserConvsFrom (paramsP LocalQuorum (usr, c) (maxIds + 1)) Nothing -> paginate Cql.selectUserConvs (paramsP LocalQuorum (Identity usr) (maxIds + 1)) - where - strip p = p {result = take (fromIntegral maxIds) (result p)} getConvIds :: Local UserId -> Range 1 1000 Int32 -> Maybe ConversationPagingState -> Client ConvIdsPage getConvIds lusr (fromRange -> maxIds) pagingState = do From 20a422859565e9ec3eb9444ea2d810706e2806d9 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 15:49:19 +0200 Subject: [PATCH 04/49] ConverastionStore: Make GetConverastionIds work during migration This is done by making the pagingState encode last conversation Id served. The store effect has a new action to list only remote conv ids. The `GetConverastionIds` action has been removed and implemented generally using `GetLocalConverastionIds` and `GetRemoteConversationIds`. This makes `MultiTabelPage` type obsolete for conv ids, but its still kept around so we don't break any APIs. --- .../src/Wire/ConversationStore.hs | 56 ++++++++- .../src/Wire/ConversationStore/Cassandra.hs | 71 +++-------- .../ConversationStore/Cassandra/Queries.hs | 5 +- .../src/Wire/ConversationStore/Postgres.hs | 116 ++---------------- 4 files changed, 86 insertions(+), 162 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore.hs b/libs/wire-subsystems/src/Wire/ConversationStore.hs index 2eb73088b7..c88a3439b7 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore.hs @@ -19,6 +19,9 @@ module Wire.ConversationStore where +import Control.Error (lastMay) +import Data.Aeson qualified as Aeson +import Data.ByteString qualified as BS import Data.Id import Data.Misc import Data.Qualified @@ -38,6 +41,7 @@ import Wire.API.MLS.LeafNode import Wire.API.MLS.SubConversation import Wire.API.Pagination import Wire.API.Provider.Service +import Wire.API.Routes.MultiTablePaging import Wire.ConversationStore.MLS.Types import Wire.Sem.Paging.Cassandra import Wire.StoredConversation @@ -72,7 +76,7 @@ data ConversationStore m a where GetConversationEpoch :: ConvId -> ConversationStore m (Maybe Epoch) GetConversations :: [ConvId] -> ConversationStore m [StoredConversation] GetLocalConversationIds :: UserId -> Maybe ConvId -> Range 1 1000 Int32 -> ConversationStore m (ResultSet ConvId) - GetConversationIds :: Local UserId -> Range 1 1000 Int32 -> Maybe ConversationPagingState -> ConversationStore m ConvIdsPage + GetRemoteConverastionIds :: UserId -> Maybe (Remote ConvId) -> Range 1 1000 Int32 -> ConversationStore m (ResultSet (Remote ConvId)) GetConversationMetadata :: ConvId -> ConversationStore m (Maybe ConversationMetadata) GetGroupInfo :: ConvId -> ConversationStore m (Maybe GroupInfoData) -- FUTUREWORK: This is only relevant for Convs in Cassandra, we can delete it @@ -145,3 +149,53 @@ acceptConnectConversation cid = setConversationType cid One2OneConv -- | Add a member to a local conversation, as an admin. upsertMember :: (Member ConversationStore r) => Local ConvId -> Local UserId -> Sem r [LocalMember] upsertMember c u = fst <$> upsertMembers (tUnqualified c) (UserList [(tUnqualified u, roleNameWireAdmin)] []) + +getConversationIdsResultSet :: forall r. (Member ConversationStore r) => Local UserId -> Range 1 1000 Int32 -> Maybe (Qualified ConvId) -> Sem r (ResultSet (Qualified ConvId)) +getConversationIdsResultSet lusr maxIds mLastId = do + case fmap (flip relativeTo lusr) mLastId of + Nothing -> getLocals Nothing + Just (Local (tUnqualified -> lastId)) -> getLocals (Just lastId) + Just (Remote lastId) -> getRemotes (Just lastId) maxIds + where + localDomain = tDomain lusr + usr = tUnqualified lusr + + getLocals :: Maybe ConvId -> Sem r (ResultSet (Qualified ConvId)) + getLocals lastId = do + localPage <- flip Qualified localDomain <$$> getLocalConversationIds usr lastId maxIds + let remainingSize = fromRange maxIds - fromIntegral (length localPage.resultSetResult) + case checked remainingSize of + Nothing -> pure localPage {resultSetType = ResultSetTruncated} + Just checkedRemaining -> do + remotePage <- getRemotes Nothing checkedRemaining + pure + remotePage + { resultSetResult = localPage.resultSetResult <> remotePage.resultSetResult + } + + getRemotes :: Maybe (Remote ConvId) -> Range 1 1000 Int32 -> Sem r (ResultSet (Qualified ConvId)) + getRemotes lastRemote maxRemotes = tUntagged <$$> getRemoteConverastionIds usr lastRemote maxRemotes + +-- | This function only exists because we use the 'MultiTablePage' type for the +-- endpoint. Since now the pagination is based on the qualified ids, we can +-- remove the use of this type in future API versions. +getConversationIds :: forall r. (Member ConversationStore r) => Local UserId -> Range 1 1000 Int32 -> Maybe ConversationPagingState -> Sem r ConvIdsPage +getConversationIds lusr maxIds pagingState = do + let mLastId = Aeson.decode . BS.fromStrict =<< (.mtpsState) =<< pagingState + resultSet <- getConversationIdsResultSet lusr maxIds mLastId + let mLastResult = lastMay resultSet.resultSetResult + pure + MultiTablePage + { mtpResults = resultSet.resultSetResult, + mtpHasMore = case resultSet.resultSetType of + ResultSetTruncated -> True + ResultSetComplete -> False, + mtpPagingState = + MultiTablePagingState + { mtpsTable = case fmap (flip relativeTo lusr) mLastResult of + Just (Local _) -> PagingLocals + Just (Remote _) -> PagingRemotes + Nothing -> PagingRemotes, + mtpsState = BS.toStrict . Aeson.encode <$> mLastResult + } + } diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 3d4349a808..c6da3447e6 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -31,7 +31,6 @@ import Control.Arrow import Control.Error.Util hiding (hoistMaybe) import Control.Lens import Control.Monad.Trans.Maybe -import Data.ByteString qualified as BS import Data.ByteString.Conversion import Data.Domain import Data.Id @@ -64,7 +63,6 @@ import Wire.API.MLS.GroupInfo import Wire.API.MLS.LeafNode (LeafIndex) import Wire.API.MLS.SubConversation import Wire.API.Provider.Service -import Wire.API.Routes.MultiTablePaging import Wire.ConversationStore (ConversationStore (..), LockAcquired (..), MLSCommitLockStore (..)) import Wire.ConversationStore qualified as ConvStore import Wire.ConversationStore.Cassandra.Instances () @@ -276,54 +274,11 @@ getLocalConvIds usr start (fromRange -> maxIds) = do Just c -> paginate Cql.selectUserConvsFrom (paramsP LocalQuorum (usr, c) (maxIds + 1)) Nothing -> paginate Cql.selectUserConvs (paramsP LocalQuorum (Identity usr) (maxIds + 1)) -getConvIds :: Local UserId -> Range 1 1000 Int32 -> Maybe ConversationPagingState -> Client ConvIdsPage -getConvIds lusr (fromRange -> maxIds) pagingState = do - let pagingTable = maybe PagingLocals (.mtpsTable) pagingState - cassPagingState = (PagingState . BS.fromStrict <$> (mtpsState =<< pagingState)) - - case pagingTable of - PagingLocals -> do - localPage <- getLocals cassPagingState - let remainingSize = maxIds - fromIntegral (length (localPage.mtpResults)) - if localPage.mtpHasMore || remainingSize <= 0 - then pure $ localPage {mtpHasMore = True} - else do - remotePage <- getRemotes remainingSize Nothing - pure $ - remotePage {mtpResults = localPage.mtpResults <> remotePage.mtpResults} - PagingRemotes -> - getRemotes maxIds cassPagingState - where - getLocals :: Maybe PagingState -> Client ConvIdsPage - getLocals cassPagingState = do - page <- - fmap (runIdentity) - <$> paginateWithState Cql.selectUserConvs (paramsPagingState LocalQuorum (Identity (tUnqualified lusr)) maxIds cassPagingState) - pure $ - MultiTablePage - { mtpResults = map (\cid -> Qualified cid (tDomain lusr)) page.pwsResults, - mtpHasMore = isJust page.pwsState, - mtpPagingState = - maybe - (MultiTablePagingState PagingRemotes Nothing) - (MultiTablePagingState PagingLocals . Just . BS.toStrict . unPagingState) - page.pwsState - } - getRemotes :: Int32 -> Maybe PagingState -> Client ConvIdsPage - getRemotes maxRemotes cassPagingState = do - page <- - uncurry (flip Qualified) - <$$> paginateWithState Cql.selectUserRemoteConvs (paramsPagingState LocalQuorum (Identity (tUnqualified lusr)) maxRemotes cassPagingState) - pure $ - MultiTablePage - { mtpResults = page.pwsResults, - mtpHasMore = isJust page.pwsState, - mtpPagingState = - maybe - (MultiTablePagingState PagingRemotes Nothing) - (MultiTablePagingState PagingRemotes . Just . BS.toStrict . unPagingState) - page.pwsState - } +getRemoteConvIds :: UserId -> Maybe (Remote ConvId) -> Range 1 1000 Int32 -> Client (ResultSet (Remote ConvId)) +getRemoteConvIds usr start (fromRange -> maxIds) = do + mkResultSetByLength (fromIntegral maxIds) . fmap (uncurry toRemoteUnsafe) . result <$> case start of + Just (tUntagged -> Qualified c dom) -> paginate Cql.selectUserRemoteConvsFrom (paramsP LocalQuorum (usr, dom, c) (maxIds + 1)) + Nothing -> paginate Cql.selectUserRemoteConvs (paramsP LocalQuorum (Identity usr) (maxIds + 1)) -- | Takes a list of remote conversation ids and fetches member status flags -- for the given user @@ -921,9 +876,9 @@ interpretConversationStoreToCassandra client = interpret $ \case GetLocalConversationIds uid start maxIds -> do logEffect "ConversationStore.GetLocalConversationIds" embedClient client $ getLocalConvIds uid start maxIds - GetConversationIds uid maxIds pagingState -> do - logEffect "ConversationStore.GetConversationIds" - embedClient client $ getConvIds uid maxIds pagingState + GetRemoteConverastionIds uid start maxIds -> do + logEffect "ConversationStore.GetRemoteConverastionIds" + embedClient client $ getRemoteConvIds uid start maxIds GetConversationMetadata cid -> do logEffect "ConversationStore.GetConversationMetadata" embedClient client $ conversationMeta cid @@ -1143,10 +1098,12 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case then ResultSetTruncated else ResultSetComplete } - GetConversationIds uid maxIds pagingState -> do - logEffect "ConversationStore.GetConversationIds" - -- TODO: Deal with paginating across DBs - embedClient client $ getConvIds uid maxIds pagingState + GetRemoteConverastionIds uid start maxIds -> do + logEffect "ConversationStore.GetRemoteConverastionIds" + withMigrationLock LockShared (Right uid) $ do + isUserInPostgres uid >>= \case + False -> embedClient client $ getRemoteConvIds uid start maxIds + True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConverastionIds uid start maxIds GetConversationMetadata cid -> do logEffect "ConversationStore.GetConversationMetadata" withMigrationLock LockShared (Left cid) $ diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs index 85079571ce..c75bfb4d06 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs @@ -243,7 +243,10 @@ updateMemberConvRoleName = {- `IF EXISTS`, but that requires benchmarking -} "up -- local conversation with remote members selectUserRemoteConvs :: PrepQuery R (Identity UserId) (Domain, ConvId) -selectUserRemoteConvs = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ?" +selectUserRemoteConvs = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? order by (conv_remote_domain, conv_remote_id)" + +selectUserRemoteConvsFrom :: PrepQuery R (UserId, Domain, ConvId) (Domain, ConvId) +selectUserRemoteConvsFrom = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? and (conv_remote_domain, conv_remote_id) > (?, ?) order by (conv_remote_domain, conv_remote_id)" insertRemoteMember :: PrepQuery W (ConvId, Domain, UserId, RoleName) () insertRemoteMember = "insert into member_remote_user (conv, user_remote_domain, user_remote_id, conversation_role) values (?, ?, ?, ?)" diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index 9e28803b64..3570fe73ca 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -2,10 +2,7 @@ module Wire.ConversationStore.Postgres (interpretConversationStoreToPostgres) where -import Control.Error (lastMay) import Control.Monad.Trans.Maybe -import Data.Aeson qualified as Aeson -import Data.ByteString qualified as BS import Data.Domain import Data.Id import Data.Map qualified as Map @@ -42,7 +39,6 @@ import Wire.API.MLS.SubConversation import Wire.API.Pagination import Wire.API.PostgresMarshall import Wire.API.Provider.Service -import Wire.API.Routes.MultiTablePaging import Wire.ConversationStore import Wire.ConversationStore.MLS.Types import Wire.Postgres @@ -63,7 +59,7 @@ interpretConversationStoreToPostgres = interpret $ \case GetConversationEpoch cid -> getConversationEpochImpl cid GetConversations cids -> getConversationsImpl cids GetLocalConversationIds uid lastConvId maxIds -> getLocalConversationIdsImpl uid lastConvId maxIds - GetConversationIds uid maxIds pagingState -> getConversationIdsImpl uid maxIds pagingState + GetRemoteConverastionIds uid lastConvId maxIds -> getRemoteConversationIdsImpl uid lastConvId maxIds GetConversationMetadata cid -> getConversationMetadataImpl cid GetGroupInfo cid -> getGroupInfoImpl cid IsConversationAlive cid -> isConversationAliveImpl cid @@ -305,101 +301,24 @@ getLocalConversationIdsImpl usr start (fromRange -> maxIds) = do LIMIT ($3 :: integer) |] -getConversationIdsImpl :: forall r. (PGConstraints r) => Local UserId -> Range 1 1000 Int32 -> Maybe ConversationPagingState -> Sem r ConvIdsPage -getConversationIdsImpl lusr (fromRange -> maxIds) pagingState = do - let pagingTable = maybe PagingLocals (.mtpsTable) pagingState - mLastId = Aeson.decode . BS.fromStrict =<< (.mtpsState) =<< pagingState - case pagingTable of - PagingLocals -> do - localPage <- getLocals maxIds mLastId - let remainingSize = maxIds - fromIntegral (length localPage.mtpResults) - if remainingSize <= 0 - then pure localPage {mtpHasMore = True} - else do - remotePage <- getRemotes remainingSize Nothing - pure $ - remotePage {mtpResults = localPage.mtpResults <> remotePage.mtpResults} - PagingRemotes -> - getRemotes maxIds mLastId +getRemoteConversationIdsImpl :: (PGConstraints r) => UserId -> Maybe (Remote ConvId) -> Range 1 1000 Int32 -> Sem r (ResultSet (Remote ConvId)) +getRemoteConversationIdsImpl usr start (fromRange -> maxIds) = do + mkResultSetByLength (fromIntegral maxIds) . map (uncurry toRemoteUnsafe) <$> case start of + Just (tUntagged -> Qualified c dom) -> runStatement (usr, dom, c, maxIds + 1) selectFrom + Nothing -> runStatement (usr, maxIds + 1) selectStart where - localDomain = tDomain lusr - usr = tUnqualified lusr - - getLocals :: Int32 -> Maybe (Qualified ConvId) -> Sem r ConvIdsPage - getLocals maxLocals mLastId = do - mkLocalsPage <$> case mLastId of - Nothing -> runStatement (usr, maxLocals) selectLocalsStart - Just (Qualified lastId _) -> runStatement (usr, lastId, maxIds) selectLocalsFrom - - getRemotes :: Int32 -> Maybe (Qualified ConvId) -> Sem r ConvIdsPage - getRemotes maxRemotes mLastId = do - mkRemotesPage maxRemotes <$> case mLastId of - Nothing -> runStatement (usr, maxRemotes) selectRemotesStart - Just (Qualified lastId lastDomain) -> runStatement (usr, lastDomain, lastId, maxRemotes) selectRemotesFrom - - mkLocalsPage :: [ConvId] -> ConvIdsPage - mkLocalsPage results = - MultiTablePage - { mtpResults = map (\cid -> Qualified cid localDomain) results, - mtpHasMore = length results >= fromIntegral maxIds, - mtpPagingState = - case lastMay results of - Nothing -> - MultiTablePagingState - { mtpsTable = PagingRemotes, - mtpsState = Nothing - } - Just newLastId -> - MultiTablePagingState - { mtpsTable = PagingLocals, - mtpsState = Just . BS.toStrict . Aeson.encode $ Qualified newLastId localDomain - } - } - - mkRemotesPage :: Int32 -> [(Domain, ConvId)] -> ConvIdsPage - mkRemotesPage maxRemotes results = - MultiTablePage - { mtpResults = map (uncurry $ flip Qualified) results, - mtpHasMore = length results >= fromIntegral maxRemotes, - mtpPagingState = - case lastMay results of - Nothing -> - -- This might look absurd because when this state is back here, - -- we'll go to the first page, but 'mtpHasMore' should be set to - -- false when we have empty results. - MultiTablePagingState - { mtpsTable = PagingRemotes, - mtpsState = Nothing - } - Just (newLastDomain, newLastId) -> - MultiTablePagingState - { mtpsTable = PagingRemotes, - mtpsState = Just . BS.toStrict $ Aeson.encode $ Qualified newLastId newLastDomain - } - } - - selectLocalsFrom :: Hasql.Statement (UserId, ConvId, Int32) [ConvId] - selectLocalsFrom = - dimapPG - [vectorStatement|SELECT (conv :: uuid) - FROM conversation_member - WHERE "user" = ($1 :: uuid) - AND conv > ($2 :: uuid) - ORDER BY conv - LIMIT ($3 :: integer) - |] - selectLocalsStart :: Hasql.Statement (UserId, Int32) [(ConvId)] - selectLocalsStart = + selectStart :: Hasql.Statement (UserId, Int32) [(Domain, ConvId)] + selectStart = dimapPG - [vectorStatement|SELECT (conv :: uuid) - FROM conversation_member + [vectorStatement|SELECT (conv_remote_domain :: text), (conv_remote_id :: uuid) + FROM remote_conversation_local_member WHERE "user" = ($1 :: uuid) - ORDER BY conv + ORDER BY (conv_remote_domain, conv_remote_id) LIMIT ($2 :: integer) |] - selectRemotesFrom :: Hasql.Statement (UserId, Domain, ConvId, Int32) [(Domain, ConvId)] - selectRemotesFrom = + selectFrom :: Hasql.Statement (UserId, Domain, ConvId, Int32) [(Domain, ConvId)] + selectFrom = dimapPG [vectorStatement|SELECT (conv_remote_domain :: text), (conv_remote_id :: uuid) FROM remote_conversation_local_member @@ -408,15 +327,6 @@ getConversationIdsImpl lusr (fromRange -> maxIds) pagingState = do ORDER BY (conv_remote_domain, conv_remote_id) LIMIT ($4 :: integer) |] - selectRemotesStart :: Hasql.Statement (UserId, Int32) [(Domain, ConvId)] - selectRemotesStart = - dimapPG - [vectorStatement|SELECT (conv_remote_domain :: text), (conv_remote_id :: uuid) - FROM remote_conversation_local_member - WHERE "user" = ($1 :: uuid) - ORDER BY (conv_remote_domain, conv_remote_id) - LIMIT ($2 :: integer) - |] getConversationMetadataImpl :: (PGConstraints r) => ConvId -> Sem r (Maybe ConversationMetadata) getConversationMetadataImpl cid = From 8372a09d536ad5198619bd930352eb5d5d203d6c Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 16:16:45 +0200 Subject: [PATCH 05/49] ConversationStore.Cassandra: Save users joining their first remote conv in postgres This is consistent with creating new conversations in postgres. This way when the migration is complete already running galley instances won't create more data in Cassandra --- .../src/Wire/ConversationStore/Cassandra.hs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index c6da3447e6..6255e05c18 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -1054,6 +1054,7 @@ interpretConversationStoreToCassandraAndPostgres :: Sem r a interpretConversationStoreToCassandraAndPostgres client = interpret $ \case UpsertConversation lcnv nc -> do + -- Save new convs in postgresql withMigrationLock LockShared (Left $ tUnqualified lcnv) $ embedClient client (getConversation (tUnqualified lcnv)) >>= \case Nothing -> interpretConversationStoreToPostgres $ ConvStore.upsertConversation lcnv nc @@ -1250,10 +1251,16 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case _ -> interpretConversationStoreToPostgres (ConvStore.upsertMembers cid ul) UpsertMembersInRemoteConversation rcid uids -> do logEffect "ConversationStore.CreateMembersInRemoteConversation" + + -- Save users joining their first remote conv in postgres withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do filterUsersInPostgres uids >>= \pgUids -> do - interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid pgUids - let cassUids = filter (`notElem` pgUids) uids + let -- These are not in Postegres, but that doesn't mean they're in + -- cassandra + nonPgUids = filter (`notElem` pgUids) uids + cassUids <- embedClient client $ haveRemoteConvs nonPgUids + let newPgUids = filter (`notElem` cassUids) uids + interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid newPgUids runEmbedded (runClient client) $ embed $ addLocalMembersToRemoteConv rcid cassUids CreateBotMember sr bid cid -> do logEffect "ConversationStore.CreateBotMember" From b269aa7af85f34e0803640ccef8eebe781dc85cd Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 16:19:53 +0200 Subject: [PATCH 06/49] ConversationStore.Cassandra: Use `embedClient` --- .../src/Wire/ConversationStore/Cassandra.hs | 114 +++++++++--------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 6255e05c18..f53e550422 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -950,40 +950,40 @@ interpretConversationStoreToCassandra client = interpret $ \case embedClient client $ deleteTeamConversations tid UpsertMembers cid ul -> do logEffect "ConversationStore.CreateMembers" - runEmbedded (runClient client) $ embed $ addMembers cid ul + embedClient client $ addMembers cid ul UpsertMembersInRemoteConversation rcid uids -> do logEffect "ConversationStore.CreateMembersInRemoteConversation" - runEmbedded (runClient client) $ embed $ addLocalMembersToRemoteConv rcid uids + embedClient client $ addLocalMembersToRemoteConv rcid uids CreateBotMember sr bid cid -> do logEffect "ConversationStore.CreateBotMember" - runEmbedded (runClient client) $ embed $ addBotMember sr bid cid + embedClient client $ addBotMember sr bid cid GetLocalMember cid uid -> do logEffect "ConversationStore.GetLocalMember" - runEmbedded (runClient client) $ embed $ member cid uid + embedClient client $ member cid uid GetLocalMembers cid -> do logEffect "ConversationStore.GetLocalMembers" - runEmbedded (runClient client) $ embed $ members cid + embedClient client $ members cid GetRemoteMember cid uid -> do logEffect "ConversationStore.GetRemoteMember" - runEmbedded (runClient client) $ embed $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) + embedClient client $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) GetRemoteMembers rcid -> do logEffect "ConversationStore.GetRemoteMembers" - runEmbedded (runClient client) $ embed $ lookupRemoteMembers rcid + embedClient client $ lookupRemoteMembers rcid CheckLocalMemberRemoteConv uid rcnv -> do logEffect "ConversationStore.CheckLocalMemberRemoteConv" - fmap (not . null) $ runEmbedded (runClient client) $ embed $ lookupLocalMemberRemoteConv uid rcnv + fmap (not . null) $ embedClient client $ lookupLocalMemberRemoteConv uid rcnv SelectRemoteMembers uids rcnv -> do logEffect "ConversationStore.SelectRemoteMembers" - runEmbedded (runClient client) $ embed $ filterRemoteConvMembers uids rcnv + embedClient client $ filterRemoteConvMembers uids rcnv SetSelfMember qcid luid upd -> do logEffect "ConversationStore.SetSelfMember" - runEmbedded (runClient client) $ embed $ updateSelfMember qcid luid upd + embedClient client $ updateSelfMember qcid luid upd SetOtherMember lcid quid upd -> do logEffect "ConversationStore.SetOtherMember" - runEmbedded (runClient client) $ embed $ updateOtherMemberLocalConv lcid quid upd + embedClient client $ updateOtherMemberLocalConv lcid quid upd DeleteMembers cnv ul -> do logEffect "ConversationStore.DeleteMembers" - runEmbedded (runClient client) $ embed $ removeMembersFromLocalConv cnv ul + embedClient client $ removeMembersFromLocalConv cnv ul DeleteMembersInRemoteConversation rcnv uids -> do logEffect "ConversationStore.DeleteMembersInRemoteConversation" runEmbedded (runClient client) $ @@ -991,54 +991,54 @@ interpretConversationStoreToCassandra client = interpret $ \case removeLocalMembersFromRemoteConv rcnv uids AddMLSClients lcnv quid cs -> do logEffect "ConversationStore.AddMLSClients" - runEmbedded (runClient client) $ embed $ addMLSClients lcnv quid cs + embedClient client $ addMLSClients lcnv quid cs PlanClientRemoval lcnv cids -> do logEffect "ConversationStore.PlanClientRemoval" - runEmbedded (runClient client) $ embed $ planMLSClientRemoval lcnv cids + embedClient client $ planMLSClientRemoval lcnv cids RemoveMLSClients lcnv quid cs -> do logEffect "ConversationStore.RemoveMLSClients" - runEmbedded (runClient client) $ embed $ removeMLSClients lcnv quid cs + embedClient client $ removeMLSClients lcnv quid cs RemoveAllMLSClients gid -> do logEffect "ConversationStore.RemoveAllMLSClients" - runEmbedded (runClient client) $ embed $ removeAllMLSClients gid + embedClient client $ removeAllMLSClients gid LookupMLSClients lcnv -> do logEffect "ConversationStore.LookupMLSClients" - runEmbedded (runClient client) $ embed $ lookupMLSClients lcnv + embedClient client $ lookupMLSClients lcnv LookupMLSClientLeafIndices lcnv -> do logEffect "ConversationStore.LookupMLSClientLeafIndices" - runEmbedded (runClient client) $ embed $ lookupMLSClientLeafIndices lcnv + embedClient client $ lookupMLSClientLeafIndices lcnv UpsertSubConversation convId subConvId groupId -> do logEffect "ConversationStore.CreateSubConversation" - runEmbedded (runClient client) $ embed $ insertSubConversation convId subConvId groupId + embedClient client $ insertSubConversation convId subConvId groupId GetSubConversation convId subConvId -> do logEffect "ConversationStore.GetSubConversation" - runEmbedded (runClient client) $ embed $ selectSubConversation convId subConvId + embedClient client $ selectSubConversation convId subConvId GetSubConversationGroupInfo convId subConvId -> do logEffect "ConversationStore.GetSubConversationGroupInfo" - runEmbedded (runClient client) $ embed $ selectSubConvGroupInfo convId subConvId + embedClient client $ selectSubConvGroupInfo convId subConvId GetSubConversationEpoch convId subConvId -> do logEffect "ConversationStore.GetSubConversationEpoch" - runEmbedded (runClient client) $ embed $ selectSubConvEpoch convId subConvId + embedClient client $ selectSubConvEpoch convId subConvId SetSubConversationGroupInfo convId subConvId mPgs -> do logEffect "ConversationStore.SetSubConversationGroupInfo" - runEmbedded (runClient client) $ embed $ updateSubConvGroupInfo convId subConvId mPgs + embedClient client $ updateSubConvGroupInfo convId subConvId mPgs SetSubConversationEpoch cid sconv epoch -> do logEffect "ConversationStore.SetSubConversationEpoch" - runEmbedded (runClient client) $ embed $ setEpochForSubConversation cid sconv epoch + embedClient client $ setEpochForSubConversation cid sconv epoch SetSubConversationCipherSuite cid sconv cs -> do logEffect "ConversationStore.SetSubConversationCipherSuite" - runEmbedded (runClient client) $ embed $ setCipherSuiteForSubConversation cid sconv cs + embedClient client $ setCipherSuiteForSubConversation cid sconv cs ListSubConversations cid -> do logEffect "ConversationStore.ListSubConversations" - runEmbedded (runClient client) $ embed $ listSubConversations cid + embedClient client $ listSubConversations cid DeleteSubConversation convId subConvId -> do logEffect "ConversationStore.DeleteSubConversation" - runEmbedded (runClient client) $ embed $ deleteSubConversation convId subConvId + embedClient client $ deleteSubConversation convId subConvId SearchConversations _ -> do logEffect "ConversationStore.SearchConversations" pure [] HaveRemoteConvs uids -> - runEmbedded (runClient client) $ embed $ haveRemoteConvs uids + embedClient client $ haveRemoteConvs uids interpretConversationStoreToCassandraAndPostgres :: forall r a. @@ -1247,7 +1247,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case logEffect "ConversationStore.CreateMembers" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ addMembers cid ul + False -> embedClient client $ addMembers cid ul _ -> interpretConversationStoreToPostgres (ConvStore.upsertMembers cid ul) UpsertMembersInRemoteConversation rcid uids -> do logEffect "ConversationStore.CreateMembersInRemoteConversation" @@ -1261,49 +1261,49 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case cassUids <- embedClient client $ haveRemoteConvs nonPgUids let newPgUids = filter (`notElem` cassUids) uids interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid newPgUids - runEmbedded (runClient client) $ embed $ addLocalMembersToRemoteConv rcid cassUids + embedClient client $ addLocalMembersToRemoteConv rcid cassUids CreateBotMember sr bid cid -> do logEffect "ConversationStore.CreateBotMember" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ addBotMember sr bid cid + False -> embedClient client $ addBotMember sr bid cid _ -> interpretConversationStoreToPostgres (ConvStore.createBotMember sr bid cid) GetLocalMember cid uid -> do logEffect "ConversationStore.GetLocalMember" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ member cid uid + False -> embedClient client $ member cid uid True -> interpretConversationStoreToPostgres (ConvStore.getLocalMember cid uid) GetLocalMembers cid -> do logEffect "ConversationStore.GetLocalMembers" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ members cid + False -> embedClient client $ members cid True -> interpretConversationStoreToPostgres (ConvStore.getLocalMembers cid) GetRemoteMember cid uid -> do logEffect "ConversationStore.GetRemoteMember" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) + False -> embedClient client $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMember cid uid) GetRemoteMembers cid -> do logEffect "ConversationStore.GetRemoteMembers" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ lookupRemoteMembers cid + False -> embedClient client $ lookupRemoteMembers cid True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMembers cid) CheckLocalMemberRemoteConv uid rcnv -> do logEffect "ConversationStore.CheckLocalMemberRemoteConv" withMigrationLock LockShared (Right uid) $ do isUserInPostgres uid >>= \case - False -> fmap (not . null) $ runEmbedded (runClient client) $ embed $ lookupLocalMemberRemoteConv uid rcnv + False -> fmap (not . null) $ embedClient client $ lookupLocalMemberRemoteConv uid rcnv True -> interpretConversationStoreToPostgres $ ConvStore.checkLocalMemberRemoteConv uid rcnv SelectRemoteMembers uids rcnv -> do logEffect "ConversationStore.SelectRemoteMembers" withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do filterUsersInPostgres uids >>= \pgUids -> do (pgUsers, _) <- interpretConversationStoreToPostgres $ ConvStore.selectRemoteMembers pgUids rcnv - (cassUsers, _) <- runEmbedded (runClient client) $ embed $ filterRemoteConvMembers uids rcnv + (cassUsers, _) <- embedClient client $ filterRemoteConvMembers uids rcnv let foundUsers = pgUsers <> cassUsers pure (foundUsers, Set.fromList foundUsers == Set.fromList uids) SetSelfMember qcid luid upd -> do @@ -1319,124 +1319,124 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case let (withLock, isInPG) = foldQualified luid localConvFunctions remoteConvFunctions qcid withLock $ isInPG >>= \case - False -> runEmbedded (runClient client) $ embed $ updateSelfMember qcid luid upd + False -> embedClient client $ updateSelfMember qcid luid upd True -> interpretConversationStoreToPostgres $ ConvStore.setSelfMember qcid luid upd SetOtherMember lcid quid upd -> do logEffect "ConversationStore.SetOtherMember" withMigrationLock LockShared (Left $ tUnqualified lcid) $ isConvInPostgres (tUnqualified lcid) >>= \case - False -> runEmbedded (runClient client) $ embed $ updateOtherMemberLocalConv lcid quid upd + False -> embedClient client $ updateOtherMemberLocalConv lcid quid upd True -> interpretConversationStoreToPostgres (ConvStore.setOtherMember lcid quid upd) DeleteMembers cid ul -> do logEffect "ConversationStore.DeleteMembers" withMigrationLock LockShared (Left cid) $ do -- No need to check where these are, we just delete them from both places - runEmbedded (runClient client) $ embed $ removeMembersFromLocalConv cid ul + embedClient client $ removeMembersFromLocalConv cid ul interpretConversationStoreToPostgres $ ConvStore.deleteMembers cid ul DeleteMembersInRemoteConversation rcnv uids -> do logEffect "ConversationStore.DeleteMembersInRemoteConversation" withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do -- No need to check where these are, we just delete them from both places - runEmbedded (runClient client) $ embed $ removeLocalMembersFromRemoteConv rcnv uids + embedClient client $ removeLocalMembersFromRemoteConv rcnv uids interpretConversationStoreToPostgres $ ConvStore.deleteMembersInRemoteConversation rcnv uids AddMLSClients groupId quid cs -> do logEffect "ConversationStore.AddMLSClients" cid <- groupIdToConvId groupId withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ addMLSClients groupId quid cs + False -> embedClient client $ addMLSClients groupId quid cs True -> interpretConversationStoreToPostgres (ConvStore.addMLSClients groupId quid cs) PlanClientRemoval gid clients -> do logEffect "ConversationStore.PlanClientRemoval" cid <- groupIdToConvId gid withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ planMLSClientRemoval gid clients + False -> embedClient client $ planMLSClientRemoval gid clients True -> interpretConversationStoreToPostgres (ConvStore.planClientRemoval gid clients) RemoveMLSClients gid quid cs -> do logEffect "ConversationStore.RemoveMLSClients" cid <- groupIdToConvId gid withMigrationLock LockShared (Left cid) $ do - runEmbedded (runClient client) $ embed $ removeMLSClients gid quid cs + embedClient client $ removeMLSClients gid quid cs interpretConversationStoreToPostgres (ConvStore.removeMLSClients gid quid cs) RemoveAllMLSClients gid -> do logEffect "ConversationStore.RemoveAllMLSClients" cid <- groupIdToConvId gid withMigrationLock LockShared (Left cid) $ do - runEmbedded (runClient client) $ embed $ removeAllMLSClients gid + embedClient client $ removeAllMLSClients gid interpretConversationStoreToPostgres (ConvStore.removeAllMLSClients gid) LookupMLSClients gid -> do logEffect "ConversationStore.LookupMLSClients" cid <- groupIdToConvId gid withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ lookupMLSClients gid + False -> embedClient client $ lookupMLSClients gid True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClients gid) LookupMLSClientLeafIndices gid -> do logEffect "ConversationStore.LookupMLSClientLeafIndices" cid <- groupIdToConvId gid withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ lookupMLSClientLeafIndices gid + False -> embedClient client $ lookupMLSClientLeafIndices gid True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClientLeafIndices gid) UpsertSubConversation convId subConvId groupId -> do logEffect "ConversationStore.CreateSubConversation" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ insertSubConversation convId subConvId groupId + False -> embedClient client $ insertSubConversation convId subConvId groupId True -> interpretConversationStoreToPostgres (ConvStore.upsertSubConversation convId subConvId groupId) GetSubConversation convId subConvId -> do logEffect "ConversationStore.GetSubConversation" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ selectSubConversation convId subConvId + False -> embedClient client $ selectSubConversation convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversation convId subConvId GetSubConversationGroupInfo convId subConvId -> do logEffect "ConversationStore.GetSubConversationGroupInfo" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ selectSubConvGroupInfo convId subConvId + False -> embedClient client $ selectSubConvGroupInfo convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationGroupInfo convId subConvId GetSubConversationEpoch convId subConvId -> do logEffect "ConversationStore.GetSubConversationEpoch" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ selectSubConvEpoch convId subConvId + False -> embedClient client $ selectSubConvEpoch convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationEpoch convId subConvId SetSubConversationGroupInfo convId subConvId mPgs -> do logEffect "ConversationStore.SetSubConversationGroupInfo" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ updateSubConvGroupInfo convId subConvId mPgs + False -> embedClient client $ updateSubConvGroupInfo convId subConvId mPgs True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationGroupInfo convId subConvId mPgs SetSubConversationEpoch cid sconv epoch -> do logEffect "ConversationStore.SetSubConversationEpoch" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ setEpochForSubConversation cid sconv epoch + False -> embedClient client $ setEpochForSubConversation cid sconv epoch True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationEpoch cid sconv epoch SetSubConversationCipherSuite cid sconv cs -> do logEffect "ConversationStore.SetSubConversationCipherSuite" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ setCipherSuiteForSubConversation cid sconv cs + False -> embedClient client $ setCipherSuiteForSubConversation cid sconv cs True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationCipherSuite cid sconv cs ListSubConversations cid -> do logEffect "ConversationStore.ListSubConversations" withMigrationLock LockShared (Left cid) $ isConvInPostgres cid >>= \case - False -> runEmbedded (runClient client) $ embed $ listSubConversations cid + False -> embedClient client $ listSubConversations cid True -> interpretConversationStoreToPostgres $ ConvStore.listSubConversations cid DeleteSubConversation convId subConvId -> do logEffect "ConversationStore.DeleteSubConversation" withMigrationLock LockShared (Left convId) $ isConvInPostgres convId >>= \case - False -> runEmbedded (runClient client) $ embed $ deleteSubConversation convId subConvId + False -> embedClient client $ deleteSubConversation convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.deleteSubConversation convId subConvId HaveRemoteConvs uids -> do logEffect "ConversationStore.DeleteSubConversation" withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do - remotesInCass <- runEmbedded (runClient client) $ embed $ haveRemoteConvs uids + remotesInCass <- embedClient client $ haveRemoteConvs uids remotesInPG <- interpretConversationStoreToPostgres $ ConvStore.haveRemoteConvs uids pure $ List.nubOrd (remotesInPG <> remotesInCass) From 2eae57c17e463b9e7e00087a15afa82f30c13639 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 16:26:49 +0200 Subject: [PATCH 07/49] ConversationStore.Migration: Delete obsolete stub --- libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index e1a383ebe5..e83d9337da 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -285,9 +285,6 @@ saveConvToPostgres allConvData = do $4 :: bigint?[], $5 :: timestamptz?[], $6 :: bytea[], $7 :: bytea?[]) |] -deleteFromCassandra :: StoredConversation -> [SubConversation] -> Sem r () -deleteFromCassandra = undefined - withCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => InterpreterFor ConversationStore r withCassandra action = do cstate <- input From 970c6ffb8b2853286d9260959c3ab1da3ee38478 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 14 Oct 2025 17:13:45 +0200 Subject: [PATCH 08/49] ConversationStore.Migration: Implement function to migrate a remote statuses of a user --- .../src/Wire/ConversationStore/Migration.hs | 76 ++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index e83d9337da..8b82ca8c6a 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -2,7 +2,8 @@ module Wire.ConversationStore.Migration where -import Cassandra (ClientState) +import Cassandra +import Control.Error (lastMay) import Data.Domain import Data.Id import Data.IntMap qualified as IntMap @@ -40,11 +41,15 @@ import Wire.ConversationStore.MLS.Types import Wire.ConversationStore.MigrationLock import Wire.ConversationStore.Postgres import Wire.Postgres (runTransaction) +import Wire.Sem.Paging.Cassandra import Wire.StoredConversation +import Wire.Util migrateAllConversations :: Sem r () migrateAllConversations = undefined +-- * Conversations + migrateConversation :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => ConvId -> Sem r () migrateConversation cid = do void . withMigrationLocks LockExclusive (Seconds 10) [Left cid] $ do @@ -285,7 +290,76 @@ saveConvToPostgres allConvData = do $4 :: bigint?[], $5 :: timestamptz?[], $6 :: bytea[], $7 :: bytea?[]) |] +-- * Users + +migrateUser :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => UserId -> Sem r () +migrateUser uid = do + void . withMigrationLocks LockExclusive (Seconds 10) [Right uid] $ do + statusses <- getRemoteMemberStatusFromCassandra uid + saveRemoteMemberStatusToPostgres uid statusses + deleteRemoteMemberStatusesFromCassandra uid + +getRemoteMemberStatusFromCassandra :: forall r. (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => UserId -> Sem r (Map (Remote ConvId) MemberStatus) +getRemoteMemberStatusFromCassandra uid = withCassandra $ do + convIds <- getAllRemoteConvIds [] Nothing + getRemoteConversationStatus uid convIds + where + getAllRemoteConvIds :: [Remote ConvId] -> Maybe (Remote ConvId) -> Sem (ConversationStore ': r) [Remote ConvId] + getAllRemoteConvIds acc mLastId = do + res <- getRemoteConverastionIds uid mLastId maxBound + let newAcc = res.resultSetResult <> acc + case (res.resultSetResult, res.resultSetType) of + ([], _) -> pure newAcc + (_, ResultSetTruncated) -> getAllRemoteConvIds newAcc (lastMay res.resultSetResult) + (_, ResultSetComplete) -> pure newAcc + +saveRemoteMemberStatusToPostgres :: (PGConstraints r) => UserId -> Map (Remote ConvId) MemberStatus -> Sem r () +saveRemoteMemberStatusToPostgres uid statusses = + runTransaction ReadCommitted Write $ do + Transaction.statement statusColumns insertStatuses + where + insertStatuses :: Hasql.Statement ([UserId], [Domain], [ConvId], [Maybe MutedStatus], [Maybe Text], [Bool], [Maybe Text], [Bool], [Maybe Text]) () + insertStatuses = + lmapPG @_ @(Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _, Vector _) + [resultlessStatement|INSERT INTO remote_conversation_local_member + ("user", conv_remote_domain, conv_remote_id, otr_muted_status, otr_muted_ref, otr_archived, otr_archived_ref, hidden, hidden_ref) + SELECT * + FROM UNNEST ($1 :: uuid[], $2 :: text[], $3 :: uuid[], + $4 :: integer?[], $5 :: text?[], + $6 :: bool[], $7 :: text?[], + $8 :: bool[], $9 :: text?[] + ) + |] + + statusColumns = unzip9 statusRows + + statusRows :: [(UserId, Domain, ConvId, Maybe MutedStatus, Maybe Text, Bool, Maybe Text, Bool, Maybe Text)] + statusRows = + Map.foldrWithKey (\rcid status -> (statusRow rcid status :)) [] statusses + + statusRow :: Remote ConvId -> MemberStatus -> (UserId, Domain, ConvId, Maybe MutedStatus, Maybe Text, Bool, Maybe Text, Bool, Maybe Text) + statusRow (tUntagged -> Qualified cid dom) MemberStatus {..} = + (uid, dom, cid, msOtrMutedStatus, msOtrMutedRef, msOtrArchived, msOtrArchivedRef, msHidden, msHiddenRef) + +deleteRemoteMemberStatusesFromCassandra :: (Member (Input ClientState) r, Member (Embed IO) r) => UserId -> Sem r () +deleteRemoteMemberStatusesFromCassandra uid = do + cstate <- input + embedClient cstate $ + retry x5 $ + write delete (params LocalQuorum (Identity uid)) + where + delete :: PrepQuery W (Identity UserId) () + delete = "delete from user_remote_conv where user = ?" + +-- * Utils + withCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => InterpreterFor ConversationStore r withCassandra action = do cstate <- input interpretConversationStoreToCassandra cstate action + +unzip9 :: [(a, b, c, d, e, f, g, h, i)] -> ([a], [b], [c], [d], [e], [f], [g], [h], [i]) +unzip9 [] = ([], [], [], [], [], [], [], [], []) +unzip9 ((y1, y2, y3, y4, y5, y6, y7, y8, y9) : ys) = + let (l1, l2, l3, l4, l5, l6, l7, l8, l9) = unzip9 ys + in (y1 : l1, y2 : l2, y3 : l3, y4 : l4, y5 : l5, y6 : l6, y7 : l7, y8 : l8, y9 : l9) From 119c5ef536d56f44ae745299d4d55a7360d84b5f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 15 Oct 2025 14:36:14 +0200 Subject: [PATCH 09/49] ConversationStore.Migration: Add top level functions to do the migration --- .../src/Wire/ConversationStore/Migration.hs | 151 +++++++++++++++++- .../Wire/ConversationStore/MigrationLock.hs | 1 + 2 files changed, 149 insertions(+), 3 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 8b82ca8c6a..9060c4e497 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -3,7 +3,11 @@ module Wire.ConversationStore.Migration where import Cassandra +import Cassandra.Settings hiding (pageSize) import Control.Error (lastMay) +import Data.Conduit +import Data.Conduit.Internal (zipSources) +import Data.Conduit.List qualified as C import Data.Domain import Data.Id import Data.IntMap qualified as IntMap @@ -13,6 +17,7 @@ import Data.Time import Data.Tuple.Extra import Data.Vector (Vector) import Data.Vector qualified as Vector +import Hasql.Pool qualified as Hasql import Hasql.Statement qualified as Hasql import Hasql.TH import Hasql.Transaction qualified as Transaction @@ -23,8 +28,10 @@ import Polysemy.Async import Polysemy.Conc import Polysemy.Error import Polysemy.Input +import Polysemy.State import Polysemy.Time import Polysemy.TinyLog +import System.Logger qualified as Log import Wire.API.Conversation hiding (Member) import Wire.API.Conversation.Protocol import Wire.API.Conversation.Role @@ -41,16 +48,135 @@ import Wire.ConversationStore.MLS.Types import Wire.ConversationStore.MigrationLock import Wire.ConversationStore.Postgres import Wire.Postgres (runTransaction) +import Wire.Sem.Logger.TinyLog (loggerToTinyLog) import Wire.Sem.Paging.Cassandra import Wire.StoredConversation import Wire.Util -migrateAllConversations :: Sem r () -migrateAllConversations = undefined +-- * Top level logic + +type EffectStack = [State Int, Input ClientState, Input Hasql.Pool, Async, Race, TinyLog, Embed IO, Final IO] + +migrateConvsLoop :: ClientState -> Hasql.Pool -> Log.Logger -> IO () +migrateConvsLoop cassClient pgPool logger = + migrationLoop cassClient pgPool logger "conversations" migrateAllConversations + +migrateUsersLoop :: ClientState -> Hasql.Pool -> Log.Logger -> IO () +migrateUsersLoop cassClient pgPool logger = + migrationLoop cassClient pgPool logger "users" migrateAllUsers + +migrationLoop :: ClientState -> Hasql.Pool -> Log.Logger -> ByteString -> ConduitT () Void (Sem EffectStack) () -> IO () +migrationLoop cassClient pgPool logger name migration = go + where + go = do + runMigration >>= \case + 0 -> Log.info logger $ Log.msg (Log.val "finished migration") + n -> do + Log.info logger $ Log.msg (Log.val "finished migration with errors") . Log.field "migration" name . Log.field "errors" n + go + + runMigration :: IO Int + runMigration = + fmap fst + . interpreter cassClient pgPool logger + $ runConduit migration + +interpreter :: ClientState -> Hasql.Pool -> Log.Logger -> Sem EffectStack a -> IO (Int, a) +interpreter cassClient pgPool logger = + runFinal + . embedToFinal + . loggerToTinyLog logger + . interpretRace + . asyncToIOFinal + . runInputConst pgPool + . runInputConst cassClient + . runState 0 + +-- * Paginated Migration + +pageSize :: Int32 +pageSize = 10000 + +migrateAllConversations :: + ( Member (Input Hasql.Pool) r, + Member (Embed IO) r, + Member (Input ClientState) r, + Member TinyLog r, + Member Async r, + Member Race r, + Member (State Int) r + ) => + ConduitM () Void (Sem r) () +migrateAllConversations = + withCount (paginateSem select (paramsP LocalQuorum () pageSize) x5) + .| logRetrievedPage + .| C.mapM_ (mapM_ (handleErrors migrateConversation "conv")) + .| C.sinkNull + where + select :: PrepQuery R () (Identity ConvId) + select = "select conv from conversation" + +migrateAllUsers :: + ( Member (Input Hasql.Pool) r, + Member (Embed IO) r, + Member (Input ClientState) r, + Member TinyLog r, + Member Async r, + Member Race r, + Member (State Int) r + ) => + ConduitM () Void (Sem r) () +migrateAllUsers = + withCount + (paginateSem select (paramsP LocalQuorum () pageSize) x5) + .| logRetrievedPage + .| C.mapM_ (mapM_ (handleErrors migrateUser "user")) + .| C.sinkNull + where + select :: PrepQuery R () (Identity UserId) + select = "select distinct user from user_remote_conv" + +logRetrievedPage :: (Member TinyLog r) => ConduitM (Int32, [Identity (Id a)]) [Id a] (Sem r) () +logRetrievedPage = + C.mapM + ( \(i, rows) -> do + let estimatedRowsSoFar = (i - 1) * pageSize + fromIntegral (length rows) + info $ Log.msg (Log.val "retrieved page") . Log.field "estimatedRowsSoFar" estimatedRowsSoFar + pure $ map runIdentity rows + ) + +withCount :: (Monad m) => ConduitM () [a] m () -> ConduitM () (Int32, [a]) m () +withCount = zipSources (C.sourceList [1 ..]) + +handleErrors :: (Member (State Int) r, Member TinyLog r) => (Id a -> Sem (Error MigrationLockError : Error Hasql.UsageError : r) b) -> ByteString -> Id a -> Sem r (Maybe b) +handleErrors action lockType id_ = + join <$> handleError (handleError action lockType) lockType id_ + +handleError :: (Member (State Int) r, Member TinyLog r, Show e) => (Id a -> Sem (Error e : r) b) -> ByteString -> Id a -> Sem r (Maybe b) +handleError action lockType id_ = do + eithErr <- runError (action id_) + case eithErr of + Right x -> pure $ Just x + Left e -> do + warn $ + Log.msg (Log.val "error occurred during migration") + . Log.field lockType (idToText id_) + . Log.field "error" (show e) + modify (+ 1) + pure Nothing -- * Conversations -migrateConversation :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => ConvId -> Sem r () +migrateConversation :: + ( PGConstraints r, + Member (Input ClientState) r, + Member TinyLog r, + Member Async r, + Member (Error MigrationLockError) r, + Member Race r + ) => + ConvId -> + Sem r () migrateConversation cid = do void . withMigrationLocks LockExclusive (Seconds 10) [Left cid] $ do mConvData <- getConvFromCassandra cid @@ -363,3 +489,22 @@ unzip9 [] = ([], [], [], [], [], [], [], [], []) unzip9 ((y1, y2, y3, y4, y5, y6, y7, y8, y9) : ys) = let (l1, l2, l3, l4, l5, l6, l7, l8, l9) = unzip9 ys in (y1 : l1, y2 : l2, y3 : l3, y4 : l4, y5 : l5, y6 : l6, y7 : l7, y8 : l8, y9 : l9) + +paginateSem :: forall a b q r. (Tuple a, Tuple b, RunQ q, Member (Input ClientState) r, Member (Embed IO) r) => q R a b -> QueryParams a -> RetrySettings -> ConduitT () [b] (Sem r) () +paginateSem q p r = go =<< lift getFirstPage + where + go page = do + unless (null (result page)) $ + yield (result page) + when (hasMore page) $ + go =<< lift (getNextPage page) + + getFirstPage :: Sem r (Page b) + getFirstPage = do + client <- input + embedClient client $ retry r (paginate q p) + + getNextPage :: Page b -> Sem r (Page b) + getNextPage page = do + client <- input + embedClient client $ retry r (nextPage page) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs index efa03dfe34..5c095c6ca7 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs @@ -36,6 +36,7 @@ withMigrationLock :: withMigrationLock ty key = withMigrationLocks ty (MilliSeconds 500) [key] data MigrationLockError = TimedOutAcquiringLock + deriving (Show) withMigrationLocks :: ( PGConstraints r, From 683e113df95674b7658276e2594482f8e72ce575 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 15 Oct 2025 17:22:36 +0200 Subject: [PATCH 10/49] Conversation.{Migration,Cassandra}: Ensure pending deletes from Cassandra are never read Exception: Listing local conversation ids and listing team conversation ids --- ...1-create-conversation-migration-status.sql | 5 + .../src/Wire/ConversationStore/Cassandra.hs | 145 +++++++++++------- .../src/Wire/ConversationStore/Migration.hs | 70 ++------- .../ConversationStore/Migration/Cleanup.hs | 124 +++++++++++++++ .../Wire/ConversationStore/Migration/Types.hs | 24 +++ .../Wire/ConversationStore/MigrationLock.hs | 8 - libs/wire-subsystems/wire-subsystems.cabal | 2 + 7 files changed, 255 insertions(+), 123 deletions(-) create mode 100644 libs/wire-subsystems/postgres-migrations/20251015124151-create-conversation-migration-status.sql create mode 100644 libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs create mode 100644 libs/wire-subsystems/src/Wire/ConversationStore/Migration/Types.hs diff --git a/libs/wire-subsystems/postgres-migrations/20251015124151-create-conversation-migration-status.sql b/libs/wire-subsystems/postgres-migrations/20251015124151-create-conversation-migration-status.sql new file mode 100644 index 0000000000..da2dbe2f09 --- /dev/null +++ b/libs/wire-subsystems/postgres-migrations/20251015124151-create-conversation-migration-status.sql @@ -0,0 +1,5 @@ +CREATE TABLE conversation_migration_pending_deletes ( + typ text NOT NULL, + id uuid NOT NULL, + PRIMARY KEY (typ, id) + ); diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index f53e550422..e12fa8a77a 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -47,7 +47,8 @@ import Polysemy import Polysemy.Async (Async) import Polysemy.Conc import Polysemy.Embed -import Polysemy.Error (Error, throw) +import Polysemy.Error (Error, runError, throw) +import Polysemy.Input import Polysemy.Time import Polysemy.TinyLog import System.Logger qualified as Log @@ -69,6 +70,7 @@ import Wire.ConversationStore.Cassandra.Instances () import Wire.ConversationStore.Cassandra.Queries qualified as Cql import Wire.ConversationStore.Cassandra.Queries qualified as Queries import Wire.ConversationStore.MLS.Types +import Wire.ConversationStore.Migration.Cleanup import Wire.ConversationStore.MigrationLock import Wire.ConversationStore.Postgres (PGConstraints, interpretConversationStoreToPostgres) import Wire.Sem.Paging.Cassandra @@ -1055,19 +1057,19 @@ interpretConversationStoreToCassandraAndPostgres :: interpretConversationStoreToCassandraAndPostgres client = interpret $ \case UpsertConversation lcnv nc -> do -- Save new convs in postgresql - withMigrationLock LockShared (Left $ tUnqualified lcnv) $ + withMigrationLockAndCleanup client LockShared (Left $ tUnqualified lcnv) $ embedClient client (getConversation (tUnqualified lcnv)) >>= \case Nothing -> interpretConversationStoreToPostgres $ ConvStore.upsertConversation lcnv nc Just _ -> embedClient client $ createConversation lcnv nc GetConversation cid -> do logEffect "ConversationStore.GetConversation" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ getConvWithPostgres cid >>= \case Nothing -> embedClient client (getConversation cid) conv -> pure conv GetConversationEpoch cid -> do logEffect "ConversationStore.GetConversationEpoch" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client (getConvEpoch cid) True -> interpretConversationStoreToPostgres $ ConvStore.getConversationEpoch cid @@ -1101,132 +1103,132 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case } GetRemoteConverastionIds uid start maxIds -> do logEffect "ConversationStore.GetRemoteConverastionIds" - withMigrationLock LockShared (Right uid) $ do + withMigrationLockAndCleanup client LockShared (Right uid) $ do isUserInPostgres uid >>= \case False -> embedClient client $ getRemoteConvIds uid start maxIds True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConverastionIds uid start maxIds GetConversationMetadata cid -> do logEffect "ConversationStore.GetConversationMetadata" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ interpretConversationStoreToPostgres (ConvStore.getConversationMetadata cid) >>= \case Nothing -> embedClient client (conversationMeta cid) meta -> pure meta GetGroupInfo cid -> do logEffect "ConversationStore.GetGroupInfo" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client (getGroupInfo cid) True -> interpretConversationStoreToPostgres (ConvStore.getGroupInfo cid) IsConversationAlive cid -> do logEffect "ConversationStore.IsConversationAlive" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client (isConvAlive cid) True -> interpretConversationStoreToPostgres (ConvStore.isConversationAlive cid) SelectConversations uid cids -> do logEffect "ConversationStore.SelectConversations" -- TODO: Figure out what to do about convs which could be left behind in cassandra - withMigrationLocks LockShared (Seconds 2) (Left <$> cids) $ do + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Left <$> cids) $ do cassConvs <- embedClient client $ localConversationIdsOf uid cids pgConvs <- interpretConversationStoreToPostgres $ ConvStore.selectConversations uid cids pure $ List.nubOrd (pgConvs <> cassConvs) GetRemoteConversationStatus uid cids -> do logEffect "ConversationStore.GetRemoteConversationStatus" - withMigrationLock LockShared (Right uid) $ do + withMigrationLockAndCleanup client LockShared (Right uid) $ do isUserInPostgres uid >>= \case False -> embedClient client $ remoteConversationStatus uid cids True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConversationStatus uid cids SetConversationType cid ty -> do logEffect "ConversationStore.SetConversationType" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvType cid ty True -> interpretConversationStoreToPostgres (ConvStore.setConversationType cid ty) SetConversationName cid value -> do logEffect "ConversationStore.SetConversationName" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvName cid value True -> interpretConversationStoreToPostgres (ConvStore.setConversationName cid value) SetConversationAccess cid value -> do logEffect "ConversationStore.SetConversationAccess" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvAccess cid value True -> interpretConversationStoreToPostgres (ConvStore.setConversationAccess cid value) SetConversationReceiptMode cid value -> do logEffect "ConversationStore.SetConversationReceiptMode" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvReceiptMode cid value True -> interpretConversationStoreToPostgres (ConvStore.setConversationReceiptMode cid value) SetConversationMessageTimer cid value -> do logEffect "ConversationStore.SetConversationMessageTimer" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvMessageTimer cid value True -> interpretConversationStoreToPostgres (ConvStore.setConversationMessageTimer cid value) SetConversationEpoch cid epoch -> do logEffect "ConversationStore.SetConversationEpoch" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvEpoch cid epoch True -> interpretConversationStoreToPostgres (ConvStore.setConversationEpoch cid epoch) SetConversationCipherSuite cid cs -> do logEffect "ConversationStore.SetConversationCipherSuite" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvCipherSuite cid cs True -> interpretConversationStoreToPostgres (ConvStore.setConversationCipherSuite cid cs) SetConversationCellsState cid ps -> do logEffect "ConversationStore.SetConversationCellsState" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateConvCellsState cid ps True -> interpretConversationStoreToPostgres (ConvStore.setConversationCellsState cid ps) ResetConversation cid groupId -> do logEffect "ConversationStore.ResetConversation" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ resetConversation cid groupId True -> interpretConversationStoreToPostgres (ConvStore.resetConversation cid groupId) DeleteConversation cid -> do logEffect "ConversationStore.DeleteConversation" - withMigrationLock LockShared (Left cid) $ do + withMigrationLockAndCleanup client LockShared (Left cid) $ do embedClient client $ deleteConversation cid interpretConversationStoreToPostgres (ConvStore.deleteConversation cid) SetGroupInfo cid gib -> do logEffect "ConversationStore.SetGroupInfo" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ setGroupInfo cid gib True -> interpretConversationStoreToPostgres (ConvStore.setGroupInfo cid gib) UpdateToMixedProtocol cid groupId epoch -> do logEffect "ConversationStore.UpdateToMixedProtocol" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> updateToMixedProtocol client cid groupId epoch True -> interpretConversationStoreToPostgres (ConvStore.updateToMixedProtocol cid groupId epoch) UpdateToMLSProtocol cid -> do logEffect "ConversationStore.UpdateToMLSProtocol" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> updateToMLSProtocol client cid _ -> interpretConversationStoreToPostgres (ConvStore.updateToMLSProtocol cid) UpdateChannelAddPermissions cid cap -> do logEffect "ConversationStore.UpdateChannelAddPermissions" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ updateChannelAddPermissions cid cap _ -> interpretConversationStoreToPostgres (ConvStore.updateChannelAddPermissions cid cap) DeleteTeamConversation tid cid -> do logEffect "ConversationStore.DeleteTeamConversation" - withMigrationLock LockShared (Left cid) $ do + withMigrationLockAndCleanup client LockShared (Left cid) $ do embedClient client $ removeTeamConv tid cid interpretConversationStoreToPostgres (ConvStore.deleteTeamConversation tid cid) GetTeamConversation tid cid -> do logEffect "ConversationStore.GetTeamConversation" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ interpretConversationStoreToPostgres (ConvStore.getTeamConversation tid cid) >>= \case Just foundCid -> pure $ Just foundCid Nothing -> embedClient client $ teamConversation tid cid @@ -1245,7 +1247,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case interpretConversationStoreToPostgres $ ConvStore.deleteTeamConversations tid UpsertMembers cid ul -> do logEffect "ConversationStore.CreateMembers" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ addMembers cid ul _ -> interpretConversationStoreToPostgres (ConvStore.upsertMembers cid ul) @@ -1253,7 +1255,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case logEffect "ConversationStore.CreateMembersInRemoteConversation" -- Save users joining their first remote conv in postgres - withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do filterUsersInPostgres uids >>= \pgUids -> do let -- These are not in Postegres, but that doesn't mean they're in -- cassandra @@ -1264,43 +1266,43 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case embedClient client $ addLocalMembersToRemoteConv rcid cassUids CreateBotMember sr bid cid -> do logEffect "ConversationStore.CreateBotMember" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ addBotMember sr bid cid _ -> interpretConversationStoreToPostgres (ConvStore.createBotMember sr bid cid) GetLocalMember cid uid -> do logEffect "ConversationStore.GetLocalMember" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ member cid uid True -> interpretConversationStoreToPostgres (ConvStore.getLocalMember cid uid) GetLocalMembers cid -> do logEffect "ConversationStore.GetLocalMembers" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ members cid True -> interpretConversationStoreToPostgres (ConvStore.getLocalMembers cid) GetRemoteMember cid uid -> do logEffect "ConversationStore.GetRemoteMember" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ lookupRemoteMember cid (tDomain uid) (tUnqualified uid) True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMember cid uid) GetRemoteMembers cid -> do logEffect "ConversationStore.GetRemoteMembers" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ lookupRemoteMembers cid True -> interpretConversationStoreToPostgres (ConvStore.getRemoteMembers cid) CheckLocalMemberRemoteConv uid rcnv -> do logEffect "ConversationStore.CheckLocalMemberRemoteConv" - withMigrationLock LockShared (Right uid) $ do + withMigrationLockAndCleanup client LockShared (Right uid) $ do isUserInPostgres uid >>= \case False -> fmap (not . null) $ embedClient client $ lookupLocalMemberRemoteConv uid rcnv True -> interpretConversationStoreToPostgres $ ConvStore.checkLocalMemberRemoteConv uid rcnv SelectRemoteMembers uids rcnv -> do logEffect "ConversationStore.SelectRemoteMembers" - withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do filterUsersInPostgres uids >>= \pgUids -> do (pgUsers, _) <- interpretConversationStoreToPostgres $ ConvStore.selectRemoteMembers pgUids rcnv (cassUsers, _) <- embedClient client $ filterRemoteConvMembers uids rcnv @@ -1309,11 +1311,11 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case SetSelfMember qcid luid upd -> do logEffect "ConversationStore.SetSelfMember" let localConvFunctions lcid = - ( withMigrationLock LockShared (Left (tUnqualified lcid)), + ( withMigrationLockAndCleanup client LockShared (Left (tUnqualified lcid)), isConvInPostgres (tUnqualified lcid) ) remoteConvFunctions _ = - ( withMigrationLock (LockShared) (Right (tUnqualified luid)), + ( withMigrationLockAndCleanup client (LockShared) (Right (tUnqualified luid)), isUserInPostgres (tUnqualified luid) ) let (withLock, isInPG) = foldQualified luid localConvFunctions remoteConvFunctions qcid @@ -1323,119 +1325,119 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case True -> interpretConversationStoreToPostgres $ ConvStore.setSelfMember qcid luid upd SetOtherMember lcid quid upd -> do logEffect "ConversationStore.SetOtherMember" - withMigrationLock LockShared (Left $ tUnqualified lcid) $ + withMigrationLockAndCleanup client LockShared (Left $ tUnqualified lcid) $ isConvInPostgres (tUnqualified lcid) >>= \case False -> embedClient client $ updateOtherMemberLocalConv lcid quid upd True -> interpretConversationStoreToPostgres (ConvStore.setOtherMember lcid quid upd) DeleteMembers cid ul -> do logEffect "ConversationStore.DeleteMembers" - withMigrationLock LockShared (Left cid) $ do + withMigrationLockAndCleanup client LockShared (Left cid) $ do -- No need to check where these are, we just delete them from both places embedClient client $ removeMembersFromLocalConv cid ul interpretConversationStoreToPostgres $ ConvStore.deleteMembers cid ul DeleteMembersInRemoteConversation rcnv uids -> do logEffect "ConversationStore.DeleteMembersInRemoteConversation" - withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do -- No need to check where these are, we just delete them from both places embedClient client $ removeLocalMembersFromRemoteConv rcnv uids interpretConversationStoreToPostgres $ ConvStore.deleteMembersInRemoteConversation rcnv uids AddMLSClients groupId quid cs -> do logEffect "ConversationStore.AddMLSClients" cid <- groupIdToConvId groupId - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ addMLSClients groupId quid cs True -> interpretConversationStoreToPostgres (ConvStore.addMLSClients groupId quid cs) PlanClientRemoval gid clients -> do logEffect "ConversationStore.PlanClientRemoval" cid <- groupIdToConvId gid - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ planMLSClientRemoval gid clients True -> interpretConversationStoreToPostgres (ConvStore.planClientRemoval gid clients) RemoveMLSClients gid quid cs -> do logEffect "ConversationStore.RemoveMLSClients" cid <- groupIdToConvId gid - withMigrationLock LockShared (Left cid) $ do + withMigrationLockAndCleanup client LockShared (Left cid) $ do embedClient client $ removeMLSClients gid quid cs interpretConversationStoreToPostgres (ConvStore.removeMLSClients gid quid cs) RemoveAllMLSClients gid -> do logEffect "ConversationStore.RemoveAllMLSClients" cid <- groupIdToConvId gid - withMigrationLock LockShared (Left cid) $ do + withMigrationLockAndCleanup client LockShared (Left cid) $ do embedClient client $ removeAllMLSClients gid interpretConversationStoreToPostgres (ConvStore.removeAllMLSClients gid) LookupMLSClients gid -> do logEffect "ConversationStore.LookupMLSClients" cid <- groupIdToConvId gid - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ lookupMLSClients gid True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClients gid) LookupMLSClientLeafIndices gid -> do logEffect "ConversationStore.LookupMLSClientLeafIndices" cid <- groupIdToConvId gid - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ lookupMLSClientLeafIndices gid True -> interpretConversationStoreToPostgres (ConvStore.lookupMLSClientLeafIndices gid) UpsertSubConversation convId subConvId groupId -> do logEffect "ConversationStore.CreateSubConversation" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ insertSubConversation convId subConvId groupId True -> interpretConversationStoreToPostgres (ConvStore.upsertSubConversation convId subConvId groupId) GetSubConversation convId subConvId -> do logEffect "ConversationStore.GetSubConversation" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ selectSubConversation convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversation convId subConvId GetSubConversationGroupInfo convId subConvId -> do logEffect "ConversationStore.GetSubConversationGroupInfo" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ selectSubConvGroupInfo convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationGroupInfo convId subConvId GetSubConversationEpoch convId subConvId -> do logEffect "ConversationStore.GetSubConversationEpoch" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ selectSubConvEpoch convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.getSubConversationEpoch convId subConvId SetSubConversationGroupInfo convId subConvId mPgs -> do logEffect "ConversationStore.SetSubConversationGroupInfo" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ updateSubConvGroupInfo convId subConvId mPgs True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationGroupInfo convId subConvId mPgs SetSubConversationEpoch cid sconv epoch -> do logEffect "ConversationStore.SetSubConversationEpoch" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ setEpochForSubConversation cid sconv epoch True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationEpoch cid sconv epoch SetSubConversationCipherSuite cid sconv cs -> do logEffect "ConversationStore.SetSubConversationCipherSuite" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ setCipherSuiteForSubConversation cid sconv cs True -> interpretConversationStoreToPostgres $ ConvStore.setSubConversationCipherSuite cid sconv cs ListSubConversations cid -> do logEffect "ConversationStore.ListSubConversations" - withMigrationLock LockShared (Left cid) $ + withMigrationLockAndCleanup client LockShared (Left cid) $ isConvInPostgres cid >>= \case False -> embedClient client $ listSubConversations cid True -> interpretConversationStoreToPostgres $ ConvStore.listSubConversations cid DeleteSubConversation convId subConvId -> do logEffect "ConversationStore.DeleteSubConversation" - withMigrationLock LockShared (Left convId) $ + withMigrationLockAndCleanup client LockShared (Left convId) $ isConvInPostgres convId >>= \case False -> embedClient client $ deleteSubConversation convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.deleteSubConversation convId subConvId HaveRemoteConvs uids -> do logEffect "ConversationStore.DeleteSubConversation" - withMigrationLocks LockShared (Seconds 2) (Right <$> uids) $ do + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do remotesInCass <- embedClient client $ haveRemoteConvs uids remotesInPG <- interpretConversationStoreToPostgres $ ConvStore.haveRemoteConvs uids pure $ List.nubOrd (remotesInPG <> remotesInCass) @@ -1466,3 +1468,34 @@ groupIdToConvId gid = Right (_, gidParts) -> pure gidParts.qConvId.qUnqualified.conv data MigrationError = InvalidGroupId + +withMigrationLockAndCleanup :: + (PGConstraints r, Member Async r, Member TinyLog r, Member Race r, Member (Error MigrationLockError) r) => + ClientState -> + LockType -> + Either ConvId UserId -> + Sem r a -> + Sem r a +withMigrationLockAndCleanup cassClient ty key = + withMigrationLocksAndCleanup cassClient ty (MilliSeconds 500) [key] + +withMigrationLocksAndCleanup :: + ( PGConstraints r, + Member Async r, + Member TinyLog r, + Member Race r, + Member (Error MigrationLockError) r, + TimeUnit u + ) => + ClientState -> + LockType -> + u -> + [Either ConvId UserId] -> + Sem r a -> + Sem r a +withMigrationLocksAndCleanup cassClient lockType maxWait convOrUsers action = + withMigrationLocks lockType maxWait convOrUsers $ do + interpretConversationStoreToCassandra cassClient + . runInputConst cassClient + $ cleanupIfNecessary convOrUsers + action diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 9060c4e497..08f52c24f4 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -45,9 +45,11 @@ import Wire.API.Provider.Service import Wire.ConversationStore import Wire.ConversationStore.Cassandra (interpretConversationStoreToCassandra) import Wire.ConversationStore.MLS.Types +import Wire.ConversationStore.Migration.Cleanup +import Wire.ConversationStore.Migration.Types import Wire.ConversationStore.MigrationLock import Wire.ConversationStore.Postgres -import Wire.Postgres (runTransaction) +import Wire.Postgres import Wire.Sem.Logger.TinyLog (loggerToTinyLog) import Wire.Sem.Paging.Cassandra import Wire.StoredConversation @@ -179,54 +181,11 @@ migrateConversation :: Sem r () migrateConversation cid = do void . withMigrationLocks LockExclusive (Seconds 10) [Left cid] $ do - mConvData <- getConvFromCassandra cid + mConvData <- withCassandra $ getAllConvData cid for_ mConvData $ \convData -> do saveConvToPostgres convData - deleteConvFromCassandra convData - -data ConvMLSDetails = ConvMLSDetails - { groupInfoData :: GroupInfoData, - clientMap :: ClientMap LeafIndex, - indexMap :: IndexMap - } - -data AllSubConvData = AllSubConvData - { subConv :: SubConversation, - groupInfoData :: Maybe GroupInfoData - } - -data AllConvData = AllConvData - { conv :: StoredConversation, - mlsDetails :: Maybe ConvMLSDetails, - subConvs :: [AllSubConvData] - } - -getConvFromCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => ConvId -> Sem r (Maybe AllConvData) -getConvFromCassandra cid = withCassandra $ do - getConversation cid >>= \case - Nothing -> pure Nothing - Just conv -> do - subConvMlsData <- listSubConversations cid - mGroupInfo <- getGroupInfo cid - mlsLeafIndices <- case mlsMetadata conv of - Nothing -> pure Nothing - Just (mlsData, _) -> do - (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId - pure $ Just (cm, im) - let mlsDetails = ConvMLSDetails <$> mGroupInfo <*> fmap fst mlsLeafIndices <*> fmap snd mlsLeafIndices - subConvs <- fmap Map.elems $ flip Map.traverseWithKey subConvMlsData $ \subConvId mlsData -> do - (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId - let subconv = - SubConversation - { scParentConvId = cid, - scSubConvId = subConvId, - scMLSData = mlsData, - scMembers = cm, - scIndexMap = im - } - gi <- getSubConversationGroupInfo cid subConvId - pure $ AllSubConvData subconv gi - pure . Just $ AllConvData {..} + withCassandra $ deleteConv convData + markDeletionComplete DeleteConv cid deleteConvFromCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => AllConvData -> Sem r () deleteConvFromCassandra allConvData = withCassandra $ do @@ -273,6 +232,7 @@ saveConvToPostgres allConvData = do Transaction.statement remoteMemberColumns insertRemoteMembers Transaction.statement subConvColumns insertSubConvs Transaction.statement mlsClientColumns insertMLSClients + Transaction.statement (DeleteConv, storedConv.id_) markDeletionPendingStmt where storedConv = allConvData.conv -- In all these queries we do nothing on conflict because if the data is in @@ -420,10 +380,11 @@ saveConvToPostgres allConvData = do migrateUser :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => UserId -> Sem r () migrateUser uid = do - void . withMigrationLocks LockExclusive (Seconds 10) [Right uid] $ do + withMigrationLocks LockExclusive (Seconds 10) [Right uid] $ do statusses <- getRemoteMemberStatusFromCassandra uid saveRemoteMemberStatusToPostgres uid statusses deleteRemoteMemberStatusesFromCassandra uid + markDeletionComplete DeleteUser uid getRemoteMemberStatusFromCassandra :: forall r. (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => UserId -> Sem r (Map (Remote ConvId) MemberStatus) getRemoteMemberStatusFromCassandra uid = withCassandra $ do @@ -443,6 +404,7 @@ saveRemoteMemberStatusToPostgres :: (PGConstraints r) => UserId -> Map (Remote C saveRemoteMemberStatusToPostgres uid statusses = runTransaction ReadCommitted Write $ do Transaction.statement statusColumns insertStatuses + Transaction.statement (DeleteUser, uid) markDeletionPendingStmt where insertStatuses :: Hasql.Statement ([UserId], [Domain], [ConvId], [Maybe MutedStatus], [Maybe Text], [Bool], [Maybe Text], [Bool], [Maybe Text]) () insertStatuses = @@ -467,17 +429,7 @@ saveRemoteMemberStatusToPostgres uid statusses = statusRow (tUntagged -> Qualified cid dom) MemberStatus {..} = (uid, dom, cid, msOtrMutedStatus, msOtrMutedRef, msOtrArchived, msOtrArchivedRef, msHidden, msHiddenRef) -deleteRemoteMemberStatusesFromCassandra :: (Member (Input ClientState) r, Member (Embed IO) r) => UserId -> Sem r () -deleteRemoteMemberStatusesFromCassandra uid = do - cstate <- input - embedClient cstate $ - retry x5 $ - write delete (params LocalQuorum (Identity uid)) - where - delete :: PrepQuery W (Identity UserId) () - delete = "delete from user_remote_conv where user = ?" - --- * Utils +-- * Other helpers withCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => InterpreterFor ConversationStore r withCassandra action = do diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs new file mode 100644 index 0000000000..875a9742c0 --- /dev/null +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs @@ -0,0 +1,124 @@ +{-# LANGUAGE RecordWildCards #-} + +module Wire.ConversationStore.Migration.Cleanup where + +import Cassandra +import Data.Id +import Data.Map qualified as Map +import Hasql.Statement qualified as Hasql +import Hasql.TH +import Imports +import Polysemy +import Polysemy.Input +import Wire.API.Conversation hiding (Member) +import Wire.API.Conversation.Protocol +import Wire.API.PostgresMarshall +import Wire.ConversationStore +import Wire.ConversationStore.MLS.Types +import Wire.ConversationStore.Migration.Types +import Wire.ConversationStore.Postgres +import Wire.Postgres +import Wire.StoredConversation +import Wire.Util + +data DeletionType = DeleteConv | DeleteUser + +instance PostgresMarshall DeletionType Text where + postgresMarshall = \case + DeleteConv -> "conv" + DeleteUser -> "user" + +markDeletionPendingStmt :: Hasql.Statement (DeletionType, Id a) () +markDeletionPendingStmt = + lmapPG + [resultlessStatement|INSERT INTO conversation_migration_pending_deletes + (typ, id) + VALUES ($1 :: text, $2 :: uuid) + ON CONFLICT DO NOTHING + |] + +markDeletionComplete :: (PGConstraints r) => DeletionType -> Id a -> Sem r () +markDeletionComplete typ id_ = runStatement (typ, id_) delete + where + delete :: Hasql.Statement (DeletionType, Id a) () + delete = + lmapPG + [resultlessStatement|DELETE FROM conversation_migration_pending_deletes + WHERE typ = $1 :: text AND id = $2 :: uuid + |] + +getAllConvData :: (Member ConversationStore r) => ConvId -> Sem r (Maybe AllConvData) +getAllConvData cid = do + getConversation cid >>= \case + Nothing -> pure Nothing + Just conv -> do + subConvMlsData <- listSubConversations cid + mGroupInfo <- getGroupInfo cid + mlsLeafIndices <- case mlsMetadata conv of + Nothing -> pure Nothing + Just (mlsData, _) -> do + (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId + pure $ Just (cm, im) + let mlsDetails = ConvMLSDetails <$> mGroupInfo <*> fmap fst mlsLeafIndices <*> fmap snd mlsLeafIndices + subConvs <- fmap Map.elems $ flip Map.traverseWithKey subConvMlsData $ \subConvId mlsData -> do + (cm, im) <- lookupMLSClientLeafIndices mlsData.cnvmlsGroupId + let subconv = + SubConversation + { scParentConvId = cid, + scSubConvId = subConvId, + scMLSData = mlsData, + scMembers = cm, + scIndexMap = im + } + gi <- getSubConversationGroupInfo cid subConvId + pure $ AllSubConvData subconv gi + pure . Just $ AllConvData {..} + +deleteConv :: (Member ConversationStore r) => AllConvData -> Sem r () +deleteConv allConvData = do + for_ allConvData.subConvs $ \subConvData -> do + removeAllMLSClients subConvData.subConv.scMLSData.cnvmlsGroupId + deleteSubConversation allConvData.conv.id_ subConvData.subConv.scSubConvId + + for_ (getMLSData allConvData.conv.protocol) $ \mlsData -> + removeAllMLSClients mlsData.cnvmlsGroupId + + case allConvData.conv.metadata.cnvmTeam of + Nothing -> deleteConversation allConvData.conv.id_ + Just tid -> deleteTeamConversation tid allConvData.conv.id_ + +deleteRemoteMemberStatusesFromCassandra :: (Member (Input ClientState) r, Member (Embed IO) r) => UserId -> Sem r () +deleteRemoteMemberStatusesFromCassandra uid = do + cstate <- input + embedClient cstate $ + retry x5 $ + write delete (params LocalQuorum (Identity uid)) + where + delete :: PrepQuery W (Identity UserId) () + delete = "delete from user_remote_conv where user = ?" + +cleanupIfNecessary :: (PGConstraints r, Member (Input ClientState) r, Member ConversationStore r) => [Either ConvId UserId] -> Sem r () +cleanupIfNecessary = mapM_ (either cleanupConvIfNecessary cleanupUserIfNecesasry) + +cleanupUserIfNecesasry :: (PGConstraints r, Member (Input ClientState) r) => UserId -> Sem r () +cleanupUserIfNecesasry uid = + whenM (isPendingDelete DeleteUser uid) $ do + deleteRemoteMemberStatusesFromCassandra uid + markDeletionComplete DeleteUser uid + +cleanupConvIfNecessary :: (PGConstraints r, Member ConversationStore r) => ConvId -> Sem r () +cleanupConvIfNecessary cid = + whenM (isPendingDelete DeleteConv cid) $ do + maybe (pure ()) deleteConv =<< getAllConvData cid + markDeletionComplete DeleteConv cid + +isPendingDelete :: (PGConstraints r) => DeletionType -> Id a -> Sem r Bool +isPendingDelete typ id_ = runStatement (typ, id_) select + where + select = + lmapPG + [singletonStatement|SELECT EXISTS (SELECT 1 + FROM conversation_migration_pending_deletes + WHERE typ = $1 :: text AND id = $2 :: uuid + ) :: boolean + |] diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Types.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Types.hs new file mode 100644 index 0000000000..d1f477188f --- /dev/null +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Types.hs @@ -0,0 +1,24 @@ +module Wire.ConversationStore.Migration.Types where + +import Imports +import Wire.API.MLS.GroupInfo +import Wire.API.MLS.LeafNode +import Wire.ConversationStore.MLS.Types +import Wire.StoredConversation + +data ConvMLSDetails = ConvMLSDetails + { groupInfoData :: GroupInfoData, + clientMap :: ClientMap LeafIndex, + indexMap :: IndexMap + } + +data AllSubConvData = AllSubConvData + { subConv :: SubConversation, + groupInfoData :: Maybe GroupInfoData + } + +data AllConvData = AllConvData + { conv :: StoredConversation, + mlsDetails :: Maybe ConvMLSDetails, + subConvs :: [AllSubConvData] + } diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs index 5c095c6ca7..d77cd0e987 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs @@ -27,14 +27,6 @@ data LockType | -- | Used for reading and writing to Cassandra, will block exclusive locks LockShared -withMigrationLock :: - (PGConstraints r, Member Async r, Member TinyLog r, Member Race r, Member (Error MigrationLockError) r) => - LockType -> - Either ConvId UserId -> - Sem r a -> - Sem r a -withMigrationLock ty key = withMigrationLocks ty (MilliSeconds 500) [key] - data MigrationLockError = TimedOutAcquiringLock deriving (Show) diff --git a/libs/wire-subsystems/wire-subsystems.cabal b/libs/wire-subsystems/wire-subsystems.cabal index 45f6c041da..cdedc4e0f4 100644 --- a/libs/wire-subsystems/wire-subsystems.cabal +++ b/libs/wire-subsystems/wire-subsystems.cabal @@ -192,6 +192,8 @@ library Wire.ConversationStore.Cassandra.Instances Wire.ConversationStore.Cassandra.Queries Wire.ConversationStore.Migration + Wire.ConversationStore.Migration.Cleanup + Wire.ConversationStore.Migration.Types Wire.ConversationStore.MigrationLock Wire.ConversationStore.MLS.Types Wire.ConversationStore.Postgres From db927babca1e26f733e5485e3f9c6a26c0a3580d Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 16 Oct 2025 10:59:57 +0200 Subject: [PATCH 11/49] ConverastionStore.Cassandra: Document limitation in listing conv ids --- .../src/Wire/ConversationStore/Cassandra.hs | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index e12fa8a77a..7eb570eb50 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -1075,17 +1075,32 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case True -> interpretConversationStoreToPostgres $ ConvStore.getConversationEpoch cid GetConversations cids -> do logEffect "ConversationStore.GetConversations" - let indexByConvId = foldr (\storedConv -> Map.insert storedConv.id_ storedConv) Map.empty - -- Important to read Cassandra first, otherwise we could miss a conv which - -- got migrated while we were reading Postgres - cassConvs <- indexByConvId <$> localConversations client cids - pgConvs <- indexByConvId <$> interpretConversationStoreToPostgres (ConvStore.getConversations cids) - pure $ mapMaybe (\cid -> Map.lookup cid pgConvs <|> Map.lookup cid cassConvs) cids + withMigrationLocksAndCleanup client LockShared (Seconds 2) (Left <$> cids) $ do + let indexByConvId = foldr (\storedConv -> Map.insert storedConv.id_ storedConv) Map.empty + cassConvs <- indexByConvId <$> localConversations client cids + pgConvs <- indexByConvId <$> interpretConversationStoreToPostgres (ConvStore.getConversations cids) + pure $ mapMaybe (\cid -> Map.lookup cid pgConvs <|> Map.lookup cid cassConvs) cids GetLocalConversationIds uid start maxIds -> do logEffect "ConversationStore.GetLocalConversationIds" - -- Important to read Cassandra first, otherwise we could miss a conv which - -- got migrated while we were reading Postgres + -- [Migration Locking Limitation] + -- + -- Here we cannot acquire any locks because we do not have convIds to start + -- with. This could cause consistency problems in two ways: + -- 1. Duplicate convId will be retrieved from PG + -- 2. Reading a conv which got deleted (or the user got removed from this + -- conv) in Postgresql after being migrated but the migration left it + -- behind in Cassandra. + -- + -- 1. is solved by de-duplicating the list below + -- 2. is not solved here. When there is any other action attempted on this + -- ConvId, it should get cleaned up from Cassandra before that action, so + -- it _should_ be fine to return it here. But strictly speaking this is + -- inconsistent behaviour. + -- + -- A solution could be to keep looping and locking until we get to a stable + -- situation, but that could run into creating too many sessions with + -- Postgres cassConvIds <- embedClient client $ getLocalConvIds uid start maxIds pgConvIds <- interpretConversationStoreToPostgres $ ConvStore.getLocalConversationIds uid start maxIds @@ -1127,7 +1142,6 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case True -> interpretConversationStoreToPostgres (ConvStore.isConversationAlive cid) SelectConversations uid cids -> do logEffect "ConversationStore.SelectConversations" - -- TODO: Figure out what to do about convs which could be left behind in cassandra withMigrationLocksAndCleanup client LockShared (Seconds 2) (Left <$> cids) $ do cassConvs <- embedClient client $ localConversationIdsOf uid cids pgConvs <- interpretConversationStoreToPostgres $ ConvStore.selectConversations uid cids @@ -1234,10 +1248,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case Nothing -> embedClient client $ teamConversation tid cid GetTeamConversations tid -> do logEffect "ConversationStore.GetTeamConversations" - -- TODO: This could return some deleted conversations if they get left - -- behind in cassandra while migration and then deleted from postgresql. - -- - -- Figure out a way to deal with this. + -- See [Migration Locking Limitation] cassConvs <- embedClient client $ getTeamConversations tid pgConvs <- interpretConversationStoreToPostgres $ ConvStore.getTeamConversations tid pure $ List.nubOrd (pgConvs <> cassConvs) @@ -1261,8 +1272,8 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case -- cassandra nonPgUids = filter (`notElem` pgUids) uids cassUids <- embedClient client $ haveRemoteConvs nonPgUids - let newPgUids = filter (`notElem` cassUids) uids - interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid newPgUids + let nonCassUids = filter (`notElem` cassUids) uids + interpretConversationStoreToPostgres $ ConvStore.upsertMembersInRemoteConversation rcid nonCassUids embedClient client $ addLocalMembersToRemoteConv rcid cassUids CreateBotMember sr bid cid -> do logEffect "ConversationStore.CreateBotMember" From 810a33998b21dda70829beff38598a872ec84b99 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 16 Oct 2025 14:42:42 +0200 Subject: [PATCH 12/49] galley: Allow choosing migration interpreter for ConversationStore --- .../src/Wire/ConversationStore/Cassandra.hs | 25 +++++++++++++------ services/galley/default.nix | 2 ++ services/galley/galley.cabal | 1 + services/galley/src/Galley/App.hs | 6 +++++ services/galley/src/Galley/Options.hs | 12 ++++++++- 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 7eb570eb50..c7d9228f52 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -43,11 +43,14 @@ import Data.Range import Data.Set qualified as Set import Data.Time import Imports +import Network.HTTP.Types.Status (status500) +import Network.Wai.Utilities.Error qualified as WaiError +import Network.Wai.Utilities.JSONResponse import Polysemy import Polysemy.Async (Async) import Polysemy.Conc import Polysemy.Embed -import Polysemy.Error (Error, runError, throw) +import Polysemy.Error (Error, mapError, throw) import Polysemy.Input import Polysemy.Time import Polysemy.TinyLog @@ -57,6 +60,7 @@ import Wire.API.Conversation hiding (Conversation, Member, members, newGroupId) import Wire.API.Conversation.CellsState import Wire.API.Conversation.Protocol import Wire.API.Conversation.Role hiding (DeleteConversation) +import Wire.API.Error import Wire.API.MLS.CipherSuite import Wire.API.MLS.Credential import Wire.API.MLS.Group.Serialisation @@ -1048,7 +1052,6 @@ interpretConversationStoreToCassandraAndPostgres :: PGConstraints r, Member Async r, Member (Error MigrationError) r, - Member (Error MigrationLockError) r, Member Race r ) => ClientState -> @@ -1478,14 +1481,20 @@ groupIdToConvId gid = Left _ -> throw InvalidGroupId Right (_, gidParts) -> pure gidParts.qConvId.qUnqualified.conv -data MigrationError = InvalidGroupId +data MigrationError + = InvalidGroupId + | FailedToAcquireMigrationLock MigrationLockError + deriving (Show) + +instance APIError MigrationError where + toResponse _ = waiErrorToJSONResponse $ WaiError.mkError status500 "internal-server-error" "Internal Server Error" withMigrationLockAndCleanup :: - (PGConstraints r, Member Async r, Member TinyLog r, Member Race r, Member (Error MigrationLockError) r) => + (PGConstraints r, Member Async r, Member TinyLog r, Member Race r, Member (Error MigrationError) r) => ClientState -> LockType -> Either ConvId UserId -> - Sem r a -> + Sem (Error MigrationLockError : r) a -> Sem r a withMigrationLockAndCleanup cassClient ty key = withMigrationLocksAndCleanup cassClient ty (MilliSeconds 500) [key] @@ -1495,17 +1504,17 @@ withMigrationLocksAndCleanup :: Member Async r, Member TinyLog r, Member Race r, - Member (Error MigrationLockError) r, + Member (Error MigrationError) r, TimeUnit u ) => ClientState -> LockType -> u -> [Either ConvId UserId] -> - Sem r a -> + Sem (Error MigrationLockError : r) a -> Sem r a withMigrationLocksAndCleanup cassClient lockType maxWait convOrUsers action = - withMigrationLocks lockType maxWait convOrUsers $ do + mapError FailedToAcquireMigrationLock . withMigrationLocks lockType maxWait convOrUsers $ do interpretConversationStoreToCassandra cassClient . runInputConst cassClient $ cleanupIfNecessary convOrUsers diff --git a/services/galley/default.nix b/services/galley/default.nix index e23c6edb98..a8198ca5dd 100644 --- a/services/galley/default.nix +++ b/services/galley/default.nix @@ -67,6 +67,7 @@ , optparse-applicative , pem , polysemy +, polysemy-conc , polysemy-wire-zoo , process , prometheus-client @@ -178,6 +179,7 @@ mkDerivation { optparse-applicative pem polysemy + polysemy-conc polysemy-wire-zoo prometheus-client proto-lens diff --git a/services/galley/galley.cabal b/services/galley/galley.cabal index 727c8b4d7c..be353a9847 100644 --- a/services/galley/galley.cabal +++ b/services/galley/galley.cabal @@ -322,6 +322,7 @@ library , optparse-applicative , pem , polysemy + , polysemy-conc , polysemy-wire-zoo , prometheus-client , proto-lens >=0.2 diff --git a/services/galley/src/Galley/App.hs b/services/galley/src/Galley/App.hs index 09a5cfbf76..e391feaf3d 100644 --- a/services/galley/src/Galley/App.hs +++ b/services/galley/src/Galley/App.hs @@ -90,6 +90,7 @@ import Network.Wai.Utilities.JSONResponse import OpenSSL.Session as Ssl import Polysemy import Polysemy.Async +import Polysemy.Conc import Polysemy.Error import Polysemy.Fail import Polysemy.Input @@ -130,6 +131,7 @@ type GalleyEffects0 = '[ Input ClientState, Input Hasql.Pool, Input Env, + Error MigrationError, Error InvalidInput, Error ParseException, Error InternalError, @@ -139,6 +141,7 @@ type GalleyEffects0 = Error TeamCollaboratorsError, Error Hasql.UsageError, Error HttpError, + Race, Async, Delay, Fail, @@ -266,6 +269,7 @@ evalGalley e = let convStoreInterpreter = case (e ^. options . postgresMigration).conversation of CassandraStorage -> interpretConversationStoreToCassandra (e ^. cstate) + MigrationToPostgresql -> interpretConversationStoreToCassandraAndPostgres (e ^. cstate) PostgresqlStorage -> interpretConversationStoreToPostgres in ExceptT . runFinal @IO @@ -276,6 +280,7 @@ evalGalley e = . failToEmbed @IO . runDelay . asyncToIOFinal + . interpretRace . mapError httpErrorToJSONResponse . logAndMapError postgresUsageErrorToHttpError (Text.pack . show) "postgres usage error" . mapError teamCollaboratorsSubsystemErrorToHttpError @@ -283,6 +288,7 @@ evalGalley e = . mapError toResponse . mapError toResponse . mapError toResponse + . logAndMapError toResponse (Text.pack . show) "migration error" . runInputConst e . runInputConst (e ^. hasqlPool) . runInputConst (e ^. cstate) diff --git a/services/galley/src/Galley/Options.hs b/services/galley/src/Galley/Options.hs index bd5173f967..6c72a7dbc9 100644 --- a/services/galley/src/Galley/Options.hs +++ b/services/galley/src/Galley/Options.hs @@ -188,11 +188,21 @@ deriveFromJSON toOptionFieldName ''JournalOpts makeLenses ''JournalOpts -data StorageLocation = CassandraStorage | PostgresqlStorage +data StorageLocation + = -- | Use when solely using Cassandra + CassandraStorage + | -- | Use while migration to postgresql. Using this option does not trigger + -- the migration. Newly created conversations are stored in Postgresql. + -- Once this has been turned on, it MUST NOT be made CassandraStorage ever + -- again. + MigrationToPostgresql + | -- | Use after migrating to postgresql + PostgresqlStorage instance FromJSON StorageLocation where parseJSON = withText "StorageLocation" $ \case "cassandra" -> pure CassandraStorage + "migration-to-postgresql" -> pure MigrationToPostgresql "postgresql" -> pure PostgresqlStorage x -> fail $ "Invalid storage location: " <> Text.unpack x <> ". Valid options: cassandra, postgresql" From bb6d7bc7266b57e651583411b2bbfdcdd75e07cd Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 21 Oct 2025 10:29:13 +0200 Subject: [PATCH 13/49] background-worker: Integrate with ConvSubsystem to allow migrating convs to postgres --- .../background-worker/templates/_helpers.tpl | 16 +++++++++ .../templates/cassandra-galley-secret.yaml | 15 +++++++++ .../templates/configmap.yaml | 20 +++++++++++ .../templates/deployment.yaml | 16 +++++++++ .../background-worker/templates/secret.yaml | 3 ++ charts/background-worker/values.yaml | 33 +++++++++++++++++++ .../src/Wire/ConversationStore/Migration.hs | 18 +++++++--- .../background-worker/background-worker.cabal | 3 ++ .../background-worker.integration.yaml | 21 ++++++++++++ .../src/Wire/BackgroundWorker.hs | 23 +++++++++---- .../src/Wire/BackgroundWorker/Env.hs | 16 +++++++-- .../src/Wire/BackgroundWorker/Options.hs | 10 +++++- .../src/Wire/MigrateConversations.hs | 18 ++++++++++ 13 files changed, 197 insertions(+), 15 deletions(-) create mode 100644 charts/background-worker/templates/cassandra-galley-secret.yaml create mode 100644 services/background-worker/src/Wire/MigrateConversations.hs diff --git a/charts/background-worker/templates/_helpers.tpl b/charts/background-worker/templates/_helpers.tpl index 96bf8dd1b8..be9800f20a 100644 --- a/charts/background-worker/templates/_helpers.tpl +++ b/charts/background-worker/templates/_helpers.tpl @@ -23,3 +23,19 @@ created one (in case the CA is provided as PEM string.) {{- dict "name" "background-worker-cassandra" "key" "ca.pem" | toYaml -}} {{- end -}} {{- end -}} + +{{- define "useCassandraTLSGalley" -}} +{{ or (hasKey .cassandraGalley "tlsCa") (hasKey .cassandraGalley "tlsCaSecretRef") }} +{{- end -}} + +{{/* Return a Dict of TLS CA secret name and key +This is used to switch between provided secret (e.g. by cert-manager) and +created one (in case the CA is provided as PEM string.) +*/}} +{{- define "tlsSecretRefGalley" -}} +{{- if .cassandraGalley.tlsCaSecretRef -}} +{{ .cassandraGalley.tlsCaSecretRef | toYaml }} +{{- else }} +{{- dict "name" "background-worker-cassandra-galley" "key" "ca.pem" | toYaml -}} +{{- end -}} +{{- end -}} diff --git a/charts/background-worker/templates/cassandra-galley-secret.yaml b/charts/background-worker/templates/cassandra-galley-secret.yaml new file mode 100644 index 0000000000..34d0164d1f --- /dev/null +++ b/charts/background-worker/templates/cassandra-galley-secret.yaml @@ -0,0 +1,15 @@ +{{/* Secret for the provided Cassandra TLS CA. */}} +{{- if not (empty .Values.config.cassandraGalley.tlsCa) }} +apiVersion: v1 +kind: Secret +metadata: + name: background-worker-cassandra-galley + labels: + app: background-worker + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +type: Opaque +data: + ca.pem: {{ .Values.config.cassandraGalley.tlsCa | b64enc | quote }} +{{- end }} diff --git a/charts/background-worker/templates/configmap.yaml b/charts/background-worker/templates/configmap.yaml index 1a0e37e609..6f2e6bd59d 100644 --- a/charts/background-worker/templates/configmap.yaml +++ b/charts/background-worker/templates/configmap.yaml @@ -30,6 +30,24 @@ data: tlsCa: /etc/wire/background-worker/cassandra/{{- (include "tlsSecretRef" . | fromYaml).key }} {{- end }} + cassandraGalley: + endpoint: + host: {{ .cassandraGalley.host }} + port: 9042 + keyspace: galley + {{- if hasKey .cassandraGalley "filterNodesByDatacentre" }} + filterNodesByDatacentre: {{ .cassandraGalley.filterNodesByDatacentre }} + {{- end }} + {{- if eq (include "useCassandraTLSGalley" .) "true" }} + tlsCa: /etc/wire/background-worker/cassandra-galley/{{- (include "tlsSecretRefGalley" . | fromYaml).key }} + {{- end }} + + postgresql: {{ toYaml .postgresql | nindent 6 }} + postgresqlPool: {{ toYaml .postgresqlPool | nindent 6 }} + {{- if hasKey $.Values.secrets "pgPassword" }} + postgresqlPassword: /etc/wire/background-worker/secrets/pgPassword + {{- end }} + {{- with .rabbitmq }} rabbitmq: host: {{ .host }} @@ -46,6 +64,8 @@ data: {{- end }} {{- end }} + migrateConversations: {{ .migrateConversations }} + backendNotificationPusher: {{toYaml .backendNotificationPusher | indent 6 }} {{- end }} diff --git a/charts/background-worker/templates/deployment.yaml b/charts/background-worker/templates/deployment.yaml index aeeab1ecc5..5f5859b461 100644 --- a/charts/background-worker/templates/deployment.yaml +++ b/charts/background-worker/templates/deployment.yaml @@ -27,6 +27,7 @@ spec: checksum/configmap: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum }} checksum/secret: {{ include (print .Template.BasePath "/secret.yaml") . | sha256sum }} checksum/cassandra-secret: {{ include (print .Template.BasePath "/cassandra-secret.yaml") . | sha256sum }} + checksum/cassandra-galley-secret: {{ include (print .Template.BasePath "/cassandra-galley-secret.yaml") . | sha256sum }} fluentbit.io/parser: json spec: serviceAccount: null @@ -44,11 +45,19 @@ spec: secret: secretName: {{ (include "tlsSecretRef" .Values.config | fromYaml).name }} {{- end }} + {{- if eq (include "useCassandraTLSGalley" .Values.config) "true" }} + - name: "background-worker-cassandra-galley" + secret: + secretName: {{ (include "tlsSecretRefGalley" .Values.config | fromYaml).name }} + {{- end }} {{- if .Values.config.rabbitmq.tlsCaSecretRef }} - name: "rabbitmq-ca" secret: secretName: {{ .Values.config.rabbitmq.tlsCaSecretRef.name }} {{- end }} + {{- if .Values.additionalVolumes }} + {{ toYaml .Values.additionalVolumes | nindent 8 }} + {{- end }} containers: - name: background-worker image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" @@ -64,10 +73,17 @@ spec: - name: "background-worker-cassandra" mountPath: "/etc/wire/background-worker/cassandra" {{- end }} + {{- if eq (include "useCassandraTLSGalley" .Values.config) "true" }} + - name: "background-worker-cassandra-galley" + mountPath: "/etc/wire/background-worker/cassandra-galley" + {{- end }} {{- if .Values.config.rabbitmq.tlsCaSecretRef }} - name: "rabbitmq-ca" mountPath: "/etc/wire/background-worker/rabbitmq-ca/" {{- end }} + {{- if .Values.additionalVolumeMounts }} + {{ toYaml .Values.additionalVolumeMounts | nindent 10 }} + {{- end }} env: - name: RABBITMQ_USERNAME valueFrom: diff --git a/charts/background-worker/templates/secret.yaml b/charts/background-worker/templates/secret.yaml index 25a22ce67e..dfde355db9 100644 --- a/charts/background-worker/templates/secret.yaml +++ b/charts/background-worker/templates/secret.yaml @@ -15,4 +15,7 @@ data: {{- with .Values.secrets }} rabbitmqUsername: {{ .rabbitmq.username | b64enc | quote }} rabbitmqPassword: {{ .rabbitmq.password | b64enc | quote }} + {{- if .pgPassword }} + pgPassword: {{ .pgPassword | b64enc | quote }} + {{- end }} {{- end }} diff --git a/charts/background-worker/values.yaml b/charts/background-worker/values.yaml index a0117c9363..9b232afc57 100644 --- a/charts/background-worker/values.yaml +++ b/charts/background-worker/values.yaml @@ -31,6 +31,39 @@ config: # key: cassandra: host: aws-cassandra + cassandraGalley: + host: aws-cassandra + + # Postgres connection settings + # + # Values are described in https://www.postgresql.org/docs/17/libpq-connect.html#LIBPQ-PARAMKEYWORDS + # To set the password via a brig secret see `secrets.pgPassword`. + # + # `additionalVolumeMounts` and `additionalVolumes` can be used to mount + # additional files (e.g. certificates) into the brig container. This way + # does not work for password files (parameter `passfile`), because + # libpq-connect requires access rights (mask 0600) for them that we cannot + # provide for random uids. + # + # Below is an example configuration we're using for our CI tests. + postgresql: + host: postgresql # DNS name without protocol + port: "5432" + user: wire-server + dbname: wire-server + postgresqlPool: + size: 5 + acquisitionTimeout: 10s + agingTimeout: 1d + idlenessTimeout: 10m + + + # Setting this to `true` will start conversation migration to postgresql. + # + # NOTE: It is very important that galley be configured to with + # `settings.postgresMigration.converastion` with `migration-to-postgresql` + # before setting this to `true`. + migrateConversations: false backendNotificationPusher: pushBackoffMinWait: 10000 # in microseconds, so 10ms diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 08f52c24f4..6ccd75f6c3 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -68,14 +68,22 @@ migrateUsersLoop cassClient pgPool logger = migrationLoop cassClient pgPool logger "users" migrateAllUsers migrationLoop :: ClientState -> Hasql.Pool -> Log.Logger -> ByteString -> ConduitT () Void (Sem EffectStack) () -> IO () -migrationLoop cassClient pgPool logger name migration = go +migrationLoop cassClient pgPool logger name migration = go 0 where - go = do + go :: Int -> IO () + go nIter = do runMigration >>= \case - 0 -> Log.info logger $ Log.msg (Log.val "finished migration") + 0 -> + Log.info logger $ + Log.msg (Log.val "finished migration") + . Log.field "attempt" nIter n -> do - Log.info logger $ Log.msg (Log.val "finished migration with errors") . Log.field "migration" name . Log.field "errors" n - go + Log.info logger $ + Log.msg (Log.val "finished migration with errors") + . Log.field "migration" name + . Log.field "errors" n + . Log.field "attempt" nIter + go (nIter + 1) runMigration :: IO Int runMigration = diff --git a/services/background-worker/background-worker.cabal b/services/background-worker/background-worker.cabal index 25079919e4..06726f4eb1 100644 --- a/services/background-worker/background-worker.cabal +++ b/services/background-worker/background-worker.cabal @@ -19,6 +19,7 @@ library Wire.BackgroundWorker.Options Wire.BackgroundWorker.Util Wire.DeadUserNotificationWatcher + Wire.MigrateConversations hs-source-dirs: src default-language: GHC2021 @@ -37,6 +38,7 @@ library , containers , exceptions , extended + , hasql-pool , HsOpenSSL , http-client , http2-manager @@ -56,6 +58,7 @@ library , wai-utilities , wire-api , wire-api-federation + , wire-subsystems default-extensions: AllowAmbiguousTypes diff --git a/services/background-worker/background-worker.integration.yaml b/services/background-worker/background-worker.integration.yaml index 60dc23a926..c850119fa9 100644 --- a/services/background-worker/background-worker.integration.yaml +++ b/services/background-worker/background-worker.integration.yaml @@ -14,6 +14,25 @@ cassandra: port: 9042 keyspace: gundeck_test +cassandraGalley: + endpoint: + host: 127.0.0.1 + port: 9042 + keyspace: galley_test + +postgresql: + host: 127.0.0.1 + port: "5432" + user: wire-server + dbname: backendA + password: posty-the-gres + +postgresqlPool: + size: 5 + acquisitionTimeout: 10s + agingTimeout: 1d + idlenessTimeout: 10m + rabbitmq: host: 127.0.0.1 port: 5671 @@ -28,3 +47,5 @@ backendNotificationPusher: pushBackoffMinWait: 1000 # 1ms pushBackoffMaxWait: 1000000 # 1s remotesRefreshInterval: 10000 # 10ms + +migrateConversations: false diff --git a/services/background-worker/src/Wire/BackgroundWorker.hs b/services/background-worker/src/Wire/BackgroundWorker.hs index e6110fb438..9487e07724 100644 --- a/services/background-worker/src/Wire/BackgroundWorker.hs +++ b/services/background-worker/src/Wire/BackgroundWorker.hs @@ -9,13 +9,14 @@ import Network.AMQP.Extended (demoteOpts) import Network.Wai.Utilities.Server import Servant import Servant.Server.Generic -import UnliftIO (concurrently_) +import UnliftIO (Concurrently (..), runConcurrently) import Util.Options import Wire.BackendNotificationPusher qualified as BackendNotificationPusher import Wire.BackgroundWorker.Env import Wire.BackgroundWorker.Health qualified as Health import Wire.BackgroundWorker.Options import Wire.DeadUserNotificationWatcher qualified as DeadUserNotificationWatcher +import Wire.MigrateConversations qualified as MigrateConverastions run :: Opts -> IO () run opts = do @@ -29,11 +30,21 @@ run opts = do runAppT env $ withNamedLogger "dead-user-notification-watcher" $ DeadUserNotificationWatcher.startWorker amqpEP - let -- cleanup will run in a new thread when the signal is caught, so we need to use IORefs and - -- specific exception types to message threads to clean up - cleanup = do - concurrently_ cleanupDeadUserNotifWatcher cleanupBackendNotifPusher - let server = defaultServer (T.unpack $ opts.backgroundWorker.host) opts.backgroundWorker.port env.logger + + cleanupConvMigration <- + if opts.migrateConversations + then + runAppT env $ + withNamedLogger "migrate-conversations" $ + MigrateConverastions.startWorker + else pure $ pure () + let cleanup = + void . runConcurrently $ + (,,) + <$> Concurrently cleanupDeadUserNotifWatcher + <*> Concurrently cleanupBackendNotifPusher + <*> Concurrently cleanupConvMigration + let server = defaultServer (T.unpack opts.backgroundWorker.host) opts.backgroundWorker.port env.logger let settings = newSettings server -- Additional cleanup when shutting down via signals. runSettingsWithCleanup cleanup settings (servantApp env) Nothing diff --git a/services/background-worker/src/Wire/BackgroundWorker/Env.hs b/services/background-worker/src/Wire/BackgroundWorker/Env.hs index 93711e9d9b..a928ceef3f 100644 --- a/services/background-worker/src/Wire/BackgroundWorker/Env.hs +++ b/services/background-worker/src/Wire/BackgroundWorker/Env.hs @@ -10,6 +10,8 @@ import Control.Monad.Catch import Control.Monad.Trans.Control import Data.Map.Strict qualified as Map import HTTP2.Client.Manager +import Hasql.Pool qualified as Hasql +import Hasql.Pool.Extended import Imports import Network.AMQP.Extended import Network.HTTP.Client @@ -49,7 +51,9 @@ data Env = Env backendNotificationsConfig :: BackendNotificationsConfig, workerRunningGauge :: Vector Text Gauge, statuses :: IORef (Map Worker IsWorking), - cassandra :: ClientState + cassandra :: ClientState, + cassandraGalley :: ClientState, + hasqlPool :: Hasql.Pool } data BackendNotificationMetrics = BackendNotificationMetrics @@ -72,9 +76,11 @@ mkWorkerRunningGauge = mkEnv :: Opts -> IO Env mkEnv opts = do logger <- Log.mkLogger opts.logLevel Nothing opts.logFormat - cassandra <- defInitCassandra opts.cassandra logger + cassandra <- defInitCassandra opts.cassandra =<< setLoggerName "cassandra-gundeck" logger + cassandraGalley <- defInitCassandra opts.cassandra =<< setLoggerName "cassandra-galley" logger http2Manager <- initHttp2Manager httpManager <- newManager defaultManagerSettings + hasqlPool <- initPostgresPool opts.postgresqlPool opts.postgresql opts.postgresqlPassword let federatorInternal = opts.federatorInternal defederationTimeout = maybe @@ -153,5 +159,9 @@ updateWorkingStatus isWorking worker = do withNamedLogger :: (MonadIO m) => Text -> AppT m a -> AppT m a withNamedLogger name action = do env <- ask - namedLogger <- lift $ Log.new $ Log.setName (Just name) $ Log.settings env.logger + namedLogger <- setLoggerName name env.logger lift $ runAppT (env {logger = namedLogger}) action + +setLoggerName :: (MonadIO m) => Text -> Log.Logger -> m Log.Logger +setLoggerName name logger = + Log.new $ Log.setName (Just name) $ Log.settings logger diff --git a/services/background-worker/src/Wire/BackgroundWorker/Options.hs b/services/background-worker/src/Wire/BackgroundWorker/Options.hs index f9055d89e0..b2176bd80e 100644 --- a/services/background-worker/src/Wire/BackgroundWorker/Options.hs +++ b/services/background-worker/src/Wire/BackgroundWorker/Options.hs @@ -1,6 +1,7 @@ module Wire.BackgroundWorker.Options where import Data.Aeson +import Hasql.Pool.Extended import Imports import Network.AMQP.Extended import System.Logger.Extended @@ -15,7 +16,14 @@ data Opts = Opts -- | Seconds, Nothing for no timeout defederationTimeout :: Maybe Int, backendNotificationPusher :: BackendNotificationsConfig, - cassandra :: CassandraOpts + cassandra :: CassandraOpts, + cassandraGalley :: CassandraOpts, + -- | Postgresql settings, the key values must be in libpq format. + -- https://www.postgresql.org/docs/17/libpq-connect.html#LIBPQ-PARAMKEYWORDS + postgresql :: !(Map Text Text), + postgresqlPassword :: !(Maybe FilePathSecrets), + postgresqlPool :: !PoolConfig, + migrateConversations :: Bool } deriving (Show, Generic) diff --git a/services/background-worker/src/Wire/MigrateConversations.hs b/services/background-worker/src/Wire/MigrateConversations.hs new file mode 100644 index 0000000000..a4d8a6d507 --- /dev/null +++ b/services/background-worker/src/Wire/MigrateConversations.hs @@ -0,0 +1,18 @@ +module Wire.MigrateConversations where + +import Imports +import UnliftIO +import Wire.BackgroundWorker.Env +import Wire.BackgroundWorker.Util +import Wire.ConversationStore.Migration + +startWorker :: AppT IO CleanupAction +startWorker = do + cassClient <- asks (.cassandraGalley) + pgPool <- asks (.hasqlPool) + logger <- asks (.logger) + convLoop <- async . lift $ migrateConvsLoop cassClient pgPool logger + userLoop <- async . lift $ migrateUsersLoop cassClient pgPool logger + pure $ do + cancel convLoop + cancel userLoop From c06d3973be0c23536c914f44336f59c4648efde1 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 21 Oct 2025 11:21:06 +0200 Subject: [PATCH 14/49] integration: Set cassandra keyspace and pg db names for dyn background worker --- integration/test/Testlib/ModService.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/integration/test/Testlib/ModService.hs b/integration/test/Testlib/ModService.hs index 9808d9b04e..c7d11bdb0f 100644 --- a/integration/test/Testlib/ModService.hs +++ b/integration/test/Testlib/ModService.hs @@ -218,13 +218,17 @@ startDynamicBackend resource beOverrides = do { galleyCfg = setField "cassandra.keyspace" resource.berGalleyKeyspace, brigCfg = setField "cassandra.keyspace" resource.berBrigKeyspace, sparCfg = setField "cassandra.keyspace" resource.berSparKeyspace, - gundeckCfg = setField "cassandra.keyspace" resource.berGundeckKeyspace + gundeckCfg = setField "cassandra.keyspace" resource.berGundeckKeyspace, + backgroundWorkerCfg = + setField "cassandra.keyspace" resource.berGundeckKeyspace + >=> setField "cassandraGalley.keyspace" resource.berGalleyKeyspace } setPgDb :: ServiceOverrides setPgDb = def { brigCfg = setField "postgresql.dbname" resource.berPostgresqlDBName, - galleyCfg = setField "postgresql.dbname" resource.berPostgresqlDBName + galleyCfg = setField "postgresql.dbname" resource.berPostgresqlDBName, + backgroundWorkerCfg = setField "postgresql.dbname" resource.berPostgresqlDBName } setEsIndex :: ServiceOverrides From cf76768857ed1602dc011fe4657e6eaff2067ccc Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 22 Oct 2025 09:33:13 +0200 Subject: [PATCH 15/49] background-worker: Fix tests --- .../test/Test/Wire/BackendNotificationPusherSpec.hs | 6 +++++- services/background-worker/test/Test/Wire/Util.hs | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs b/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs index 9e88cbe9f0..4ef829d447 100644 --- a/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs +++ b/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs @@ -323,7 +323,9 @@ spec = do logger <- Logger.new Logger.defSettings httpManager <- newManager defaultManagerSettings let cassandra = undefined - let federatorInternal = Endpoint "localhost" 8097 + cassandraGalley = undefined + hasqlPool = undefined + federatorInternal = Endpoint "localhost" 8097 http2Manager = undefined statuses = undefined rabbitmqAdminClient = Just $ mockRabbitMqAdminClient mockAdmin @@ -341,6 +343,8 @@ spec = do mockAdmin <- newMockRabbitMqAdmin True ["backend-notifications.foo.example"] logger <- Logger.new Logger.defSettings let cassandra = undefined + cassandraGalley = undefined + hasqlPool = undefined httpManager <- newManager defaultManagerSettings let federatorInternal = Endpoint "localhost" 8097 http2Manager = undefined diff --git a/services/background-worker/test/Test/Wire/Util.hs b/services/background-worker/test/Test/Wire/Util.hs index cb4eeef5f9..f07aa6e06f 100644 --- a/services/background-worker/test/Test/Wire/Util.hs +++ b/services/background-worker/test/Test/Wire/Util.hs @@ -15,6 +15,8 @@ testEnv = do http2Manager <- initHttp2Manager logger <- Logger.new Logger.defSettings let cassandra = undefined + cassandraGalley = undefined + hasqlPool = undefined statuses <- newIORef mempty backendNotificationMetrics <- mkBackendNotificationMetrics workerRunningGauge <- mkWorkerRunningGauge From 524b75df6a63729dbc254e2a2dd6908db4d0f004 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 22 Oct 2025 12:25:20 +0200 Subject: [PATCH 16/49] ConversationStore.Cassandra: Fix queries for selecting remote convs of a user --- .../src/Wire/ConversationStore/Cassandra/Queries.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs index c75bfb4d06..945babd7b0 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra/Queries.hs @@ -243,10 +243,10 @@ updateMemberConvRoleName = {- `IF EXISTS`, but that requires benchmarking -} "up -- local conversation with remote members selectUserRemoteConvs :: PrepQuery R (Identity UserId) (Domain, ConvId) -selectUserRemoteConvs = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? order by (conv_remote_domain, conv_remote_id)" +selectUserRemoteConvs = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? order by conv_remote_domain, conv_remote_id" selectUserRemoteConvsFrom :: PrepQuery R (UserId, Domain, ConvId) (Domain, ConvId) -selectUserRemoteConvsFrom = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? and (conv_remote_domain, conv_remote_id) > (?, ?) order by (conv_remote_domain, conv_remote_id)" +selectUserRemoteConvsFrom = "select conv_remote_domain, conv_remote_id from user_remote_conv where user = ? and (conv_remote_domain, conv_remote_id) > (?, ?) order by conv_remote_domain, conv_remote_id" insertRemoteMember :: PrepQuery W (ConvId, Domain, UserId, RoleName) () insertRemoteMember = "insert into member_remote_user (conv, user_remote_domain, user_remote_id, conversation_role) values (?, ?, ?, ?)" From a50ae6d562d967c020272559132bfb676f676aef Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 22 Oct 2025 12:25:42 +0200 Subject: [PATCH 17/49] integration: Add basic test to test conversation migration --- integration/test/MLS/Util.hs | 8 +- integration/test/SetupHelpers.hs | 14 ++ integration/test/Test/Conversation.hs | 178 ++++++++++++++++++++++++++ 3 files changed, 198 insertions(+), 2 deletions(-) diff --git a/integration/test/MLS/Util.hs b/integration/test/MLS/Util.hs index 5b2b327715..a3e6d51557 100644 --- a/integration/test/MLS/Util.hs +++ b/integration/test/MLS/Util.hs @@ -205,8 +205,12 @@ generateKeyPackage cid suite = do -- | Create conversation and corresponding group. createNewGroup :: (HasCallStack) => Ciphersuite -> ClientIdentity -> App ConvId -createNewGroup cs cid = do - conv <- postConversation cid defMLS >>= getJSON 201 +createNewGroup cs cid = createNewGroupWith cs cid defMLS + +-- | Create conversation and corresponding group. +createNewGroupWith :: (HasCallStack) => Ciphersuite -> ClientIdentity -> CreateConv -> App ConvId +createNewGroupWith cs cid cc = do + conv <- postConversation cid cc {protocol = "mls"} >>= getJSON 201 convId <- objConvId conv createGroup cs cid convId pure convId diff --git a/integration/test/SetupHelpers.hs b/integration/test/SetupHelpers.hs index dc4b8eb604..6d20a0766e 100644 --- a/integration/test/SetupHelpers.hs +++ b/integration/test/SetupHelpers.hs @@ -155,6 +155,20 @@ getAllConvs u = do resp.json result %. "found" & asList +getAllConvIds :: (HasCallStack, MakesValue u) => u -> Int -> App [Value] +getAllConvIds u pageSize = go [] Nothing + where + go acc state0 = do + page <- bindResponse (listConversationIds u def {size = Just pageSize, pagingState = state0}) $ \resp -> do + resp.status `shouldMatchInt` 200 + resp.json + ids <- page %. "qualified_conversations" & asList + state <- page %. "paging_state" >>= asOptional >>= traverse asString + hasMore <- page %. "has_more" & asBool + if hasMore + then go (acc <> ids) state + else pure (acc <> ids) + -- | Setup a team user, another user, connect the two, create a proteus -- conversation, upgrade to mixed. Return the two users and the conversation. simpleMixedConversationSetup :: diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index c699bbc726..733c9fd33e 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -29,8 +29,12 @@ import Control.Concurrent (threadDelay) import Control.Monad.Codensity import Control.Monad.Reader import qualified Data.Aeson as Aeson +import Data.IntMap (IntMap) +import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet import qualified Data.Text as T import GHC.Stack +import MLS.Util import Notifications import SetupHelpers hiding (deleteUser) import Testlib.One2One (generateRemoteAndConvIdWithDomain) @@ -1028,3 +1032,177 @@ testGetSelfMember = do resp.json %. "service" `shouldMatch` Null resp.json %. "status" `shouldMatchInt` 0 resp.json %. "status_ref" `shouldMatch` "0.0" + +-- | The migration has these phases. +-- 1. Write to cassandra (before any migration activity) +-- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) +-- 3. Backgound worker starts migration +-- 4. Background worker finishes migration, galley is still configured to think migration is on going +-- 5. Background worker is configured to not do anything, galley is configured to only use PG +-- +-- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. +-- +-- The tests are from the perspective of mel, a user on the dynamic backend, +-- called backendM (migraing backend). There are also users called mark and mia +-- on this backend. +-- +-- TODO: +-- Also create convs and send messages in all phases +testMigrationToPostgres :: App () +testMigrationToPostgres = do + resourcePool <- asks (.resourcePool) + (alice, aliceTid, _) <- createTeam OwnDomain 1 + (bob, bobTid, _) <- createTeam OtherDomain 1 + [aliceC, bobC] <- traverse (createMLSClient def) [alice, bob] + + let phase1Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "cassandra", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase2Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase3Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" True + } + phase4Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase5Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phaseOverrides = + IntMap.fromList + [ (1, phase1Overrides), + (2, phase2Overrides), + (3, phase3Overrides), + (4, phase4Overrides), + (5, phase5Overrides) + ] + runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do + let domainM = migratingBackend.berDomain + (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do + [mel, mark] <- createUsers [domainM, domainM] + (mia, miaTid, _) <- createTeam domainM 1 + [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] + connectUsers [alice, bob, mel, mark, mia] + otherMelConvs <- getAllConvIds mel 100 + + domainAConvs <- createTestConvs aliceC aliceTid melC markC [] + domainBConvs <- createTestConvs bobC bobTid melC markC [] + domainMConvs <- createTestConvs miaC miaTid melC markC [] + pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + + addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] + $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ do + let runPhase :: (HasCallStack) => Int -> App () + runPhase phase = do + putStrLn $ "----------> Start phase: " <> show phase + runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do + runPhaseOperations phase aliceC aliceTid domainAConvs melC markC + runPhaseOperations phase bobC bobTid domainBConvs melC markC + runPhaseOperations phase miaC miaTid domainMConvs melC markC + actualConvs <- getAllConvIds mel n + let expectedConvsFrom dom = + dom.unmodifiedConvs + <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems dom.kickMarkConvs) + <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + expectedConvs = + expectedConvsFrom domainAConvs + <> expectedConvsFrom domainBConvs + <> expectedConvsFrom domainMConvs + + actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) + -- TODO: Explcitly wait for migration to be over + when (phase == 3) $ do + liftIO $ threadDelay 10_000_000 + runPhase 1 + runPhase 2 + runPhase 3 + runPhase 4 + runPhase 5 + where + n = 1 + -- Creates n convs of these types: + -- 1. Convs that will exist unmodified during the test + -- 2. Convs that will kick mel in each phase + -- 3. Convs that will kick mark in each phase + -- 4. Convs that will be deleted in each phase + createTestConvs :: (HasCallStack) => ClientIdentity -> String -> ClientIdentity -> ClientIdentity -> [ClientIdentity] -> App TestConvList + createTestConvs creatorC tid melC markC othersC = do + unmodifiedConvs <- replicateM n $ do + createTestConv creatorC tid (melC : markC : othersC) + + kickMelConvs <- forPhase $ createTestConv creatorC tid (melC : othersC) + kickMarkConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) + delConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) + pure $ TestConvList {..} + + createTestConv :: (HasCallStack) => ClientIdentity -> String -> [ClientIdentity] -> App ConvId + createTestConv creatorC tid membersC = do + conv <- createNewGroupWith def creatorC defMLS {team = Just tid} + traverse_ (uploadNewKeyPackage def) membersC + void $ createAddCommit creatorC conv ((.qualifiedUserId) <$> membersC) >>= sendAndConsumeCommitBundle + pure conv + + forPhase :: App a -> App (IntMap [a]) + forPhase action = + fmap IntMap.fromList . for [1 .. 5] $ \phase -> do + convs <- replicateM n $ action + pure (phase, convs) + + runPhaseOperations :: (HasCallStack) => Int -> ClientIdentity -> String -> TestConvList -> ClientIdentity -> ClientIdentity -> App () + runPhaseOperations phase convAdmin tid TestConvList {..} melC markC = do + for_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do + mp <- createRemoveCommit convAdmin convId [melC] + void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 + + for_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do + mp <- createRemoveCommit convAdmin convId [markC] + void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 + + for_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do + deleteTeamConversation tid convId convAdmin >>= assertSuccess + getConversation convAdmin convId `bindResponse` \resp -> + resp.status `shouldMatchInt` 404 + +-- Test Helpers + +data TestConvList = TestConvList + { unmodifiedConvs :: [ConvId], + kickMelConvs :: IntMap [ConvId], + kickMarkConvs :: IntMap [ConvId], + delConvs :: IntMap [ConvId] + } + +instance ToJSON TestConvList where + toJSON convList = do + object + [ fromString "unmodifiedConvs" .= (mkId <$> convList.unmodifiedConvs), + fromString "kickMelConvs" .= (mkId <$$> convList.kickMelConvs), + fromString "kickMarkConvs" .= (mkId <$$> convList.kickMelConvs), + fromString "delConvs" .= (mkId <$$> convList.delConvs) + ] + where + mkId :: ConvId -> String + mkId cid = cid.id_ <> "@" <> cid.domain + +instance Semigroup TestConvList where + l1 <> l2 = + TestConvList + { unmodifiedConvs = l1.unmodifiedConvs <> l2.unmodifiedConvs, + kickMelConvs = IntMap.unionWith (<>) l1.kickMelConvs l2.kickMelConvs, + kickMarkConvs = IntMap.unionWith (<>) l1.kickMarkConvs l2.kickMarkConvs, + delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs + } From fca56af9856d062bce12a5e35fe619ec55c1401f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 23 Oct 2025 15:59:43 +0200 Subject: [PATCH 18/49] background-worker: Fix copy-pasta error with cassandra galley --- services/background-worker/src/Wire/BackgroundWorker/Env.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/background-worker/src/Wire/BackgroundWorker/Env.hs b/services/background-worker/src/Wire/BackgroundWorker/Env.hs index a928ceef3f..af9f2aaf5f 100644 --- a/services/background-worker/src/Wire/BackgroundWorker/Env.hs +++ b/services/background-worker/src/Wire/BackgroundWorker/Env.hs @@ -77,7 +77,7 @@ mkEnv :: Opts -> IO Env mkEnv opts = do logger <- Log.mkLogger opts.logLevel Nothing opts.logFormat cassandra <- defInitCassandra opts.cassandra =<< setLoggerName "cassandra-gundeck" logger - cassandraGalley <- defInitCassandra opts.cassandra =<< setLoggerName "cassandra-galley" logger + cassandraGalley <- defInitCassandra opts.cassandraGalley =<< setLoggerName "cassandra-galley" logger http2Manager <- initHttp2Manager httpManager <- newManager defaultManagerSettings hasqlPool <- initPostgresPool opts.postgresqlPool opts.postgresql opts.postgresqlPassword From 7089959e268c1004c19f92f53e77c5b794822c06 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 23 Oct 2025 16:00:22 +0200 Subject: [PATCH 19/49] background-worker: Add some observability to pg migration --- .../src/Wire/ConversationStore/Migration.hs | 55 +++++++++++-------- services/background-worker/default.nix | 4 ++ .../src/Wire/MigrateConversations.hs | 16 +++++- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 6ccd75f6c3..22891084d0 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -31,6 +31,7 @@ import Polysemy.Input import Polysemy.State import Polysemy.Time import Polysemy.TinyLog +import Prometheus qualified import System.Logger qualified as Log import Wire.API.Conversation hiding (Member) import Wire.API.Conversation.Protocol @@ -59,16 +60,18 @@ import Wire.Util type EffectStack = [State Int, Input ClientState, Input Hasql.Pool, Async, Race, TinyLog, Embed IO, Final IO] -migrateConvsLoop :: ClientState -> Hasql.Pool -> Log.Logger -> IO () -migrateConvsLoop cassClient pgPool logger = - migrationLoop cassClient pgPool logger "conversations" migrateAllConversations +migrateConvsLoop :: ClientState -> Hasql.Pool -> Log.Logger -> Prometheus.Counter -> Prometheus.Counter -> IO () +migrateConvsLoop cassClient pgPool logger migCounter migFinished = + migrationLoop cassClient pgPool logger "conversations" migFinished $ migrateAllConversations migCounter -migrateUsersLoop :: ClientState -> Hasql.Pool -> Log.Logger -> IO () -migrateUsersLoop cassClient pgPool logger = - migrationLoop cassClient pgPool logger "users" migrateAllUsers +migrateUsersLoop :: ClientState -> Hasql.Pool -> Log.Logger -> Prometheus.Counter -> Prometheus.Counter -> IO () +migrateUsersLoop cassClient pgPool logger migCounter migFinished = + migrationLoop cassClient pgPool logger "users" migFinished $ migrateAllUsers migCounter -migrationLoop :: ClientState -> Hasql.Pool -> Log.Logger -> ByteString -> ConduitT () Void (Sem EffectStack) () -> IO () -migrationLoop cassClient pgPool logger name migration = go 0 +migrationLoop :: ClientState -> Hasql.Pool -> Log.Logger -> ByteString -> Prometheus.Counter -> ConduitT () Void (Sem EffectStack) () -> IO () +migrationLoop cassClient pgPool logger name migFinished migration = do + go 0 + Prometheus.incCounter migFinished where go :: Int -> IO () go nIter = do @@ -116,12 +119,13 @@ migrateAllConversations :: Member Race r, Member (State Int) r ) => + Prometheus.Counter -> ConduitM () Void (Sem r) () -migrateAllConversations = +migrateAllConversations migCounter = do + lift $ info $ Log.msg (Log.val "migrateAllConversations") withCount (paginateSem select (paramsP LocalQuorum () pageSize) x5) .| logRetrievedPage - .| C.mapM_ (mapM_ (handleErrors migrateConversation "conv")) - .| C.sinkNull + .| C.mapM_ (mapM_ (handleErrors (migrateConversation migCounter) "conv")) where select :: PrepQuery R () (Identity ConvId) select = "select conv from conversation" @@ -135,13 +139,13 @@ migrateAllUsers :: Member Race r, Member (State Int) r ) => + Prometheus.Counter -> ConduitM () Void (Sem r) () -migrateAllUsers = - withCount - (paginateSem select (paramsP LocalQuorum () pageSize) x5) +migrateAllUsers migCounter = do + lift $ info $ Log.msg (Log.val "migrateAllUsers") + withCount (paginateSem select (paramsP LocalQuorum () pageSize) x5) .| logRetrievedPage - .| C.mapM_ (mapM_ (handleErrors migrateUser "user")) - .| C.sinkNull + .| C.mapM_ (mapM_ (handleErrors (migrateUser migCounter) "user")) where select :: PrepQuery R () (Identity UserId) select = "select distinct user from user_remote_conv" @@ -185,15 +189,17 @@ migrateConversation :: Member (Error MigrationLockError) r, Member Race r ) => + Prometheus.Counter -> ConvId -> Sem r () -migrateConversation cid = do +migrateConversation migCounter cid = do void . withMigrationLocks LockExclusive (Seconds 10) [Left cid] $ do mConvData <- withCassandra $ getAllConvData cid for_ mConvData $ \convData -> do saveConvToPostgres convData withCassandra $ deleteConv convData markDeletionComplete DeleteConv cid + liftIO $ Prometheus.incCounter migCounter deleteConvFromCassandra :: (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => AllConvData -> Sem r () deleteConvFromCassandra allConvData = withCassandra $ do @@ -250,8 +256,8 @@ saveConvToPostgres allConvData = do [resultlessStatement|INSERT INTO conversation (id, type, creator, access, access_roles_v2, name, team, message_timer, receipt_mode, protocol, - group_id, epoch, epoch_timestamp, cipher_suite, group_conv_type, - channel_add_permission, cells_state, parent_conv) + group_id, epoch, epoch_timestamp, cipher_suite, public_group_state, + group_conv_type, channel_add_permission, cells_state, parent_conv) VALUES ($1 :: uuid, $2 :: integer, $3 :: uuid?, $4 :: integer[], $5 :: integer[], $6 :: text?, $7 :: uuid?, $8 :: bigint?, $9 :: integer?, $10 :: integer, @@ -386,13 +392,14 @@ saveConvToPostgres allConvData = do -- * Users -migrateUser :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => UserId -> Sem r () -migrateUser uid = do +migrateUser :: (PGConstraints r, Member (Input ClientState) r, Member TinyLog r, Member Async r, Member (Error MigrationLockError) r, Member Race r) => Prometheus.Counter -> UserId -> Sem r () +migrateUser migCounter uid = do withMigrationLocks LockExclusive (Seconds 10) [Right uid] $ do statusses <- getRemoteMemberStatusFromCassandra uid saveRemoteMemberStatusToPostgres uid statusses deleteRemoteMemberStatusesFromCassandra uid markDeletionComplete DeleteUser uid + liftIO $ Prometheus.incCounter migCounter getRemoteMemberStatusFromCassandra :: forall r. (Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => UserId -> Sem r (Map (Remote ConvId) MemberStatus) getRemoteMemberStatusFromCassandra uid = withCassandra $ do @@ -450,10 +457,12 @@ unzip9 ((y1, y2, y3, y4, y5, y6, y7, y8, y9) : ys) = let (l1, l2, l3, l4, l5, l6, l7, l8, l9) = unzip9 ys in (y1 : l1, y2 : l2, y3 : l3, y4 : l4, y5 : l5, y6 : l6, y7 : l7, y8 : l8, y9 : l9) -paginateSem :: forall a b q r. (Tuple a, Tuple b, RunQ q, Member (Input ClientState) r, Member (Embed IO) r) => q R a b -> QueryParams a -> RetrySettings -> ConduitT () [b] (Sem r) () -paginateSem q p r = go =<< lift getFirstPage +paginateSem :: forall a b q r. (Tuple a, Tuple b, RunQ q, Member (Input ClientState) r, Member TinyLog r, Member (Embed IO) r) => q R a b -> QueryParams a -> RetrySettings -> ConduitT () [b] (Sem r) () +paginateSem q p r = do + go =<< lift getFirstPage where go page = do + lift $ info $ Log.msg (Log.val "Got a page") unless (null (result page)) $ yield (result page) when (hasMore page) $ diff --git a/services/background-worker/default.nix b/services/background-worker/default.nix index 7ef4b6ab45..654566ad6e 100644 --- a/services/background-worker/default.nix +++ b/services/background-worker/default.nix @@ -15,6 +15,7 @@ , extended , federator , gitignoreSource +, hasql-pool , HsOpenSSL , hspec , http-client @@ -42,6 +43,7 @@ , wai-utilities , wire-api , wire-api-federation +, wire-subsystems }: mkDerivation { pname = "background-worker"; @@ -59,6 +61,7 @@ mkDerivation { containers exceptions extended + hasql-pool HsOpenSSL http-client http2-manager @@ -78,6 +81,7 @@ mkDerivation { wai-utilities wire-api wire-api-federation + wire-subsystems ]; executableHaskellDepends = [ HsOpenSSL imports types-common ]; testHaskellDepends = [ diff --git a/services/background-worker/src/Wire/MigrateConversations.hs b/services/background-worker/src/Wire/MigrateConversations.hs index a4d8a6d507..72cb716298 100644 --- a/services/background-worker/src/Wire/MigrateConversations.hs +++ b/services/background-worker/src/Wire/MigrateConversations.hs @@ -1,6 +1,8 @@ module Wire.MigrateConversations where import Imports +import Prometheus +import System.Logger qualified as Log import UnliftIO import Wire.BackgroundWorker.Env import Wire.BackgroundWorker.Util @@ -11,8 +13,18 @@ startWorker = do cassClient <- asks (.cassandraGalley) pgPool <- asks (.hasqlPool) logger <- asks (.logger) - convLoop <- async . lift $ migrateConvsLoop cassClient pgPool logger - userLoop <- async . lift $ migrateUsersLoop cassClient pgPool logger + + Log.info logger $ Log.msg (Log.val "starting conversation migration") + convMigCounter <- register $ counter $ Prometheus.Info "wire_local_convs_migrated_to_pg" "Number of local conversations migrated to Postgresql" + convMigFinished <- register $ counter $ Prometheus.Info "wire_local_convs_migration_finished" "Whether the converastion migateion to Postgresql is finished" + userMigCounter <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migrated_to_pg" "Number of users whose remote conversation membership data is migrated to Postgresql" + userMigFinished <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migration_finished" "Whether the migration of remote conversation membership data to Postgresql is finished" + + convLoop <- async . lift $ migrateConvsLoop cassClient pgPool logger convMigCounter convMigFinished + userLoop <- async . lift $ migrateUsersLoop cassClient pgPool logger userMigCounter userMigFinished + + Log.info logger $ Log.msg (Log.val "started conversation migration") pure $ do + Log.info logger $ Log.msg (Log.val "cancelling conversation migration") cancel convLoop cancel userLoop From 9d7fb56997c974b469c507df54c3709baccea3af Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 23 Oct 2025 16:30:47 +0200 Subject: [PATCH 20/49] integration: Use background worker metrics to check migration completion --- integration/test/SetupHelpers.hs | 5 +++++ integration/test/Test/Conversation.hs | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/integration/test/SetupHelpers.hs b/integration/test/SetupHelpers.hs index 6d20a0766e..cceea99d27 100644 --- a/integration/test/SetupHelpers.hs +++ b/integration/test/SetupHelpers.hs @@ -717,3 +717,8 @@ registerInvitedUser domain tid email = do >>= asString >>= registerUser domain email >>= assertSuccess + +getMetrics :: (HasCallStack, MakesValue domain) => domain -> Service -> App Response +getMetrics domain service = do + req <- rawBaseRequest domain service Unversioned "/i/metrics" + submit "GET" req diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index 733c9fd33e..2996c501ed 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -32,7 +32,10 @@ import qualified Data.Aeson as Aeson import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap import qualified Data.IntSet as IntSet +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text import GHC.Stack import MLS.Util import Notifications @@ -41,6 +44,7 @@ import Testlib.One2One (generateRemoteAndConvIdWithDomain) import Testlib.Prelude import Testlib.ResourcePool import Testlib.VersionedFed +import Text.Regex.TDFA ((=~)) testFederatedConversation :: (HasCallStack) => App () testFederatedConversation = do @@ -1124,9 +1128,8 @@ testMigrationToPostgres = do <> expectedConvsFrom domainMConvs actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) - -- TODO: Explcitly wait for migration to be over - when (phase == 3) $ do - liftIO $ threadDelay 10_000_000 + + when (phase == 3) $ waitForMigration domainM runPhase 1 runPhase 2 runPhase 3 @@ -1177,6 +1180,17 @@ testMigrationToPostgres = do getConversation convAdmin convId `bindResponse` \resp -> resp.status `shouldMatchInt` 404 + waitForMigration :: (HasCallStack) => String -> App () + waitForMigration domainM = do + metrics <- + getMetrics domainM BackgroundWorker `bindResponse` \resp -> do + resp.status `shouldMatchInt` 200 + pure $ Text.decodeUtf8 resp.body + let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do + liftIO $ threadDelay 100_000 + waitForMigration domainM -- Test Helpers data TestConvList = TestConvList From 28c22e79c0d10c9b38569832223dcb9c30528519 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 23 Oct 2025 16:57:39 +0200 Subject: [PATCH 21/49] integration: Add another test with more convos, but use proteus for speed --- integration/test/Test/Conversation.hs | 169 +++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 2 deletions(-) diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index 2996c501ed..990e531b1e 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -45,6 +45,7 @@ import Testlib.Prelude import Testlib.ResourcePool import Testlib.VersionedFed import Text.Regex.TDFA ((=~)) +import UnliftIO testFederatedConversation :: (HasCallStack) => App () testFederatedConversation = do @@ -1052,8 +1053,8 @@ testGetSelfMember = do -- -- TODO: -- Also create convs and send messages in all phases -testMigrationToPostgres :: App () -testMigrationToPostgres = do +testMigrationToPostgresMLS :: App () +testMigrationToPostgresMLS = do resourcePool <- asks (.resourcePool) (alice, aliceTid, _) <- createTeam OwnDomain 1 (bob, bobTid, _) <- createTeam OtherDomain 1 @@ -1191,6 +1192,170 @@ testMigrationToPostgres = do when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do liftIO $ threadDelay 100_000 waitForMigration domainM + +-- | The migration has these phases. +-- 1. Write to cassandra (before any migration activity) +-- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) +-- 3. Backgound worker starts migration +-- 4. Background worker finishes migration, galley is still configured to think migration is on going +-- 5. Background worker is configured to not do anything, galley is configured to only use PG +-- +-- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. +-- +-- The tests are from the perspective of mel, a user on the dynamic backend, +-- called backendM (migraing backend). There are also users called mark and mia +-- on this backend. +-- +-- TODO: +-- Also create convs and send messages in all phases +testMigrationToPostgresJustProteus :: App () +testMigrationToPostgresJustProteus = do + resourcePool <- asks (.resourcePool) + (alice, aliceTid, _) <- createTeam OwnDomain 1 + (bob, bobTid, _) <- createTeam OtherDomain 1 + + let phase1Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "cassandra", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase2Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase3Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" True + } + phase4Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase5Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phaseOverrides = + IntMap.fromList + [ (1, phase1Overrides), + (2, phase2Overrides), + (3, phase3Overrides), + (4, phase4Overrides), + (5, phase5Overrides) + ] + runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do + let domainM = migratingBackend.berDomain + (mel, _melC, mark, _markC, mia, _miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do + [mel, mark] <- createUsers [domainM, domainM] + (mia, miaTid, _) <- createTeam domainM 1 + [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] + connectUsers [alice, bob, mel, mark, mia] + otherMelConvs <- getAllConvIds mel 100 + + domainAConvs <- createTestConvs alice aliceTid mel mark [] + domainBConvs <- createTestConvs bob bobTid mel mark [] + domainMConvs <- createTestConvs mia miaTid mel mark [] + pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + + addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] + $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ do + let runPhase :: (HasCallStack) => Int -> App () + runPhase phase = do + putStrLn $ "----------> Start phase: " <> show phase + runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do + runPhaseOperations phase alice aliceTid domainAConvs mel mark + runPhaseOperations phase bob bobTid domainBConvs mel mark + runPhaseOperations phase mia miaTid domainMConvs mel mark + actualConvs <- getAllConvIds mel n + let expectedConvsFrom dom = + dom.unmodifiedConvs + <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems dom.kickMarkConvs) + <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + expectedConvs = + expectedConvsFrom domainAConvs + <> expectedConvsFrom domainBConvs + <> expectedConvsFrom domainMConvs + + actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) + + when (phase == 3) $ waitForMigration domainM + runPhase 1 + runPhase 2 + runPhase 3 + runPhase 4 + runPhase 5 + where + n = 10 + -- Creates n convs of these types: + -- 1. Convs that will exist unmodified during the test + -- 2. Convs that will kick mel in each phase + -- 3. Convs that will kick mark in each phase + -- 4. Convs that will be deleted in each phase + createTestConvs :: (HasCallStack) => Value -> String -> Value -> Value -> [Value] -> App TestConvList + createTestConvs creatorC tid mel mark others = do + unmodifiedConvs <- replicateConcurrently n $ do + createTestConv creatorC tid (mel : mark : others) + + kickMelConvs <- forPhase $ createTestConv creatorC tid (mel : others) + kickMarkConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) + delConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) + pure $ TestConvList {..} + + createTestConv :: (HasCallStack) => Value -> String -> [Value] -> App ConvId + createTestConv creator tid members = do + postConversation creator defProteus {team = Just tid, qualifiedUsers = members} + >>= getJSON 201 + >>= objConvId + + forPhase :: App a -> App (IntMap [a]) + forPhase action = + fmap IntMap.fromList . forConcurrently [1 .. 5] $ \phase -> do + convs <- replicateM n $ action + pure (phase, convs) + + retry500Once :: App Response -> App Response + retry500Once action = do + action `bindResponse` \resp -> do + if resp.status == 500 + then action + else pure resp + + runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App () + runPhaseOperations phase convAdmin tid TestConvList {..} mel mark = do + withWebSocket mel $ \melWS -> do + forConcurrently_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do + retry500Once (removeMember convAdmin convId mel) >>= assertSuccess + + void $ awaitNMatches n isConvLeaveNotif melWS + + forConcurrently_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do + retry500Once (removeMember convAdmin convId mark) >>= assertSuccess + + void $ awaitNMatches n isConvLeaveNotif melWS + + forConcurrently_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do + retry500Once (deleteTeamConversation tid convId convAdmin) >>= assertSuccess + + void $ awaitNMatches n isConvDeleteNotif melWS + + waitForMigration :: (HasCallStack) => String -> App () + waitForMigration domainM = do + metrics <- + getMetrics domainM BackgroundWorker `bindResponse` \resp -> do + resp.status `shouldMatchInt` 200 + pure $ Text.decodeUtf8 resp.body + let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do + liftIO $ threadDelay 100_000 + waitForMigration domainM + -- Test Helpers data TestConvList = TestConvList From 0085d5beb5c2ba2e3ef3918df56a26b6488219ad Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 09:38:09 +0100 Subject: [PATCH 22/49] ConversationStore.{Cassandra,Postgres}: Use same ordering of UUIDs Cassandra cares for UUID version in ordering. Postgres doesn't, so the Postgres query needs to be weird. --- .../src/Wire/ConversationStore/Cassandra.hs | 3 ++- .../src/Wire/ConversationStore/Postgres.hs | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index c7d9228f52..9e817efd74 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -42,6 +42,7 @@ import Data.Qualified import Data.Range import Data.Set qualified as Set import Data.Time +import Data.UUID.Util qualified as UUID import Imports import Network.HTTP.Types.Status (status500) import Network.Wai.Utilities.Error qualified as WaiError @@ -1107,7 +1108,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case cassConvIds <- embedClient client $ getLocalConvIds uid start maxIds pgConvIds <- interpretConversationStoreToPostgres $ ConvStore.getLocalConversationIds uid start maxIds - let allResults = List.nubOrd (pgConvIds.resultSetResult <> cassConvIds.resultSetResult) + let allResults = List.nubOrdBy (\id1 id2 -> (UUID.version $ toUUID id1, id1) `compare` (UUID.version $ toUUID id2, id2)) (pgConvIds.resultSetResult <> cassConvIds.resultSetResult) maxIdsInt = (fromIntegral $ fromRange maxIds) pure $ ResultSet diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index 3570fe73ca..636c3bdd2f 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -286,7 +286,7 @@ getLocalConversationIdsImpl usr start (fromRange -> maxIds) = do [vectorStatement|SELECT (conv :: uuid) FROM conversation_member WHERE "user" = ($1 :: uuid) - ORDER BY conv + ORDER BY uuid_extract_version(conv), conv LIMIT ($2 :: integer) |] @@ -297,7 +297,7 @@ getLocalConversationIdsImpl usr start (fromRange -> maxIds) = do FROM conversation_member WHERE "user" = ($1 :: uuid) AND conv > ($2 :: uuid) - ORDER BY conv + ORDER BY uuid_extract_version(conv), conv LIMIT ($3 :: integer) |] @@ -313,7 +313,7 @@ getRemoteConversationIdsImpl usr start (fromRange -> maxIds) = do [vectorStatement|SELECT (conv_remote_domain :: text), (conv_remote_id :: uuid) FROM remote_conversation_local_member WHERE "user" = ($1 :: uuid) - ORDER BY (conv_remote_domain, conv_remote_id) + ORDER BY conv_remote_domain, uuid_extract_version(conv_remote_id), conv_remote_id LIMIT ($2 :: integer) |] @@ -323,8 +323,8 @@ getRemoteConversationIdsImpl usr start (fromRange -> maxIds) = do [vectorStatement|SELECT (conv_remote_domain :: text), (conv_remote_id :: uuid) FROM remote_conversation_local_member WHERE "user" = ($1 :: uuid) - AND (conv_remote_domain, conv_remote_id) > ($2 :: text, $3 ::uuid) - ORDER BY (conv_remote_domain, conv_remote_id) + AND (conv_remote_domain, uuid_extract_version(conv_remote_id), conv_remote_id) > ($2 :: text, uuid_extract_version($3 :: uuid), $3 :: uuid) + ORDER BY conv_remote_domain, uuid_extract_version(conv_remote_id), conv_remote_id LIMIT ($4 :: integer) |] From bad16687616e56857fa76f22508b1cbc4e795771 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 09:56:32 +0100 Subject: [PATCH 23/49] integration: Add mel to convs in every phase --- integration/test/Test/Conversation.hs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index 990e531b1e..b488e6ba4d 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -1109,6 +1109,7 @@ testMigrationToPostgresMLS = do addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ addJSONToFailureContext "otherMelConvs" otherMelConvs $ do let runPhase :: (HasCallStack) => Int -> App () runPhase phase = do @@ -1123,6 +1124,7 @@ testMigrationToPostgresMLS = do <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) <> concat (IntMap.elems dom.kickMarkConvs) <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [(phase + 1) .. 5]))) expectedConvs = expectedConvsFrom domainAConvs <> expectedConvsFrom domainBConvs @@ -1151,6 +1153,7 @@ testMigrationToPostgresMLS = do kickMelConvs <- forPhase $ createTestConv creatorC tid (melC : othersC) kickMarkConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) delConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) + addMelConvs <- forPhase $ createTestConv creatorC tid othersC pure $ TestConvList {..} createTestConv :: (HasCallStack) => ClientIdentity -> String -> [ClientIdentity] -> App ConvId @@ -1181,6 +1184,10 @@ testMigrationToPostgresMLS = do getConversation convAdmin convId `bindResponse` \resp -> resp.status `shouldMatchInt` 404 + for_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do + void $ uploadNewKeyPackage def melC + void $ createAddCommit convAdmin convId [melC.qualifiedUserId] >>= sendAndConsumeCommitBundle + waitForMigration :: (HasCallStack) => String -> App () waitForMigration domainM = do metrics <- @@ -1263,6 +1270,7 @@ testMigrationToPostgresJustProteus = do addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ addJSONToFailureContext "otherMelConvs" otherMelConvs $ do let runPhase :: (HasCallStack) => Int -> App () runPhase phase = do @@ -1277,6 +1285,7 @@ testMigrationToPostgresJustProteus = do <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) <> concat (IntMap.elems dom.kickMarkConvs) <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [1 .. phase]))) expectedConvs = expectedConvsFrom domainAConvs <> expectedConvsFrom domainBConvs @@ -1305,6 +1314,7 @@ testMigrationToPostgresJustProteus = do kickMelConvs <- forPhase $ createTestConv creatorC tid (mel : others) kickMarkConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) delConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) + addMelConvs <- forPhase $ createTestConv creatorC tid others pure $ TestConvList {..} createTestConv :: (HasCallStack) => Value -> String -> [Value] -> App ConvId @@ -1342,6 +1352,9 @@ testMigrationToPostgresJustProteus = do forConcurrently_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do retry500Once (deleteTeamConversation tid convId convAdmin) >>= assertSuccess + forConcurrently_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do + retry500Once (addMembers convAdmin convId (def {users = [mel]})) >>= assertSuccess + void $ awaitNMatches n isConvDeleteNotif melWS waitForMigration :: (HasCallStack) => String -> App () @@ -1362,7 +1375,8 @@ data TestConvList = TestConvList { unmodifiedConvs :: [ConvId], kickMelConvs :: IntMap [ConvId], kickMarkConvs :: IntMap [ConvId], - delConvs :: IntMap [ConvId] + delConvs :: IntMap [ConvId], + addMelConvs :: IntMap [ConvId] } instance ToJSON TestConvList where @@ -1370,8 +1384,9 @@ instance ToJSON TestConvList where object [ fromString "unmodifiedConvs" .= (mkId <$> convList.unmodifiedConvs), fromString "kickMelConvs" .= (mkId <$$> convList.kickMelConvs), - fromString "kickMarkConvs" .= (mkId <$$> convList.kickMelConvs), - fromString "delConvs" .= (mkId <$$> convList.delConvs) + fromString "kickMarkConvs" .= (mkId <$$> convList.kickMarkConvs), + fromString "delConvs" .= (mkId <$$> convList.delConvs), + fromString "addMelConvs" .= (mkId <$$> convList.addMelConvs) ] where mkId :: ConvId -> String @@ -1383,5 +1398,6 @@ instance Semigroup TestConvList where { unmodifiedConvs = l1.unmodifiedConvs <> l2.unmodifiedConvs, kickMelConvs = IntMap.unionWith (<>) l1.kickMelConvs l2.kickMelConvs, kickMarkConvs = IntMap.unionWith (<>) l1.kickMarkConvs l2.kickMarkConvs, - delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs + delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs, + addMelConvs = IntMap.unionWith (<>) l1.addMelConvs l2.addMelConvs } From a4f1c32c41308bbf03837c76b6ec07f944f1fc0b Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 12:35:44 +0100 Subject: [PATCH 24/49] ConversationStore.Postgres: Fix bug in ordering conv ids --- libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index 636c3bdd2f..2b2028aa66 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -296,7 +296,7 @@ getLocalConversationIdsImpl usr start (fromRange -> maxIds) = do [vectorStatement|SELECT (conv :: uuid) FROM conversation_member WHERE "user" = ($1 :: uuid) - AND conv > ($2 :: uuid) + AND (uuid_extract_version(conv), conv) > (uuid_extract_version($2 :: uuid), $2 :: uuid) ORDER BY uuid_extract_version(conv), conv LIMIT ($3 :: integer) |] From 9ba7f5602ee83e8df8f4443d0b546a718d145591 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 12:36:46 +0100 Subject: [PATCH 25/49] ConverastionStore.Cassandra: Sort results explicitly, nubOrd doesn't sort them --- libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 9e817efd74..409ef4a6fb 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -1108,7 +1108,9 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case cassConvIds <- embedClient client $ getLocalConvIds uid start maxIds pgConvIds <- interpretConversationStoreToPostgres $ ConvStore.getLocalConversationIds uid start maxIds - let allResults = List.nubOrdBy (\id1 id2 -> (UUID.version $ toUUID id1, id1) `compare` (UUID.version $ toUUID id2, id2)) (pgConvIds.resultSetResult <> cassConvIds.resultSetResult) + let allResults = + sortOn (\cid -> (UUID.version $ toUUID cid, cid)) $ + List.nubOrd (pgConvIds.resultSetResult <> cassConvIds.resultSetResult) maxIdsInt = (fromIntegral $ fromRange maxIds) pure $ ResultSet From 33dac70ee14d04ae7ff636bc57a8a10356e717fe Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 12:42:54 +0100 Subject: [PATCH 26/49] integration: Create new convs in every phase --- integration/test/Test/Conversation.hs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index b488e6ba4d..380a1788fd 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -1268,6 +1268,7 @@ testMigrationToPostgresJustProteus = do domainMConvs <- createTestConvs mia miaTid mel mark [] pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + newConvsRef <- newIORef [] addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) $ addJSONToFailureContext "otherMelConvs" otherMelConvs @@ -1276,9 +1277,12 @@ testMigrationToPostgresJustProteus = do runPhase phase = do putStrLn $ "----------> Start phase: " <> show phase runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do - runPhaseOperations phase alice aliceTid domainAConvs mel mark - runPhaseOperations phase bob bobTid domainBConvs mel mark - runPhaseOperations phase mia miaTid domainMConvs mel mark + newDomainAConvs <- runPhaseOperations phase alice aliceTid domainAConvs mel mark + newDomainBConvs <- runPhaseOperations phase bob bobTid domainBConvs mel mark + newDomainCConvs <- runPhaseOperations phase mia miaTid domainMConvs mel mark + let newConvs = newDomainAConvs <> newDomainBConvs <> newDomainCConvs + modifyIORef newConvsRef (newConvs <>) + allNewConvs <- readIORef newConvsRef actualConvs <- getAllConvIds mel n let expectedConvsFrom dom = dom.unmodifiedConvs @@ -1290,6 +1294,7 @@ testMigrationToPostgresJustProteus = do expectedConvsFrom domainAConvs <> expectedConvsFrom domainBConvs <> expectedConvsFrom domainMConvs + <> allNewConvs actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) @@ -1306,6 +1311,7 @@ testMigrationToPostgresJustProteus = do -- 2. Convs that will kick mel in each phase -- 3. Convs that will kick mark in each phase -- 4. Convs that will be deleted in each phase + -- 5. Convs that will add mel in each phase createTestConvs :: (HasCallStack) => Value -> String -> Value -> Value -> [Value] -> App TestConvList createTestConvs creatorC tid mel mark others = do unmodifiedConvs <- replicateConcurrently n $ do @@ -1336,7 +1342,7 @@ testMigrationToPostgresJustProteus = do then action else pure resp - runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App () + runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App [ConvId] runPhaseOperations phase convAdmin tid TestConvList {..} mel mark = do withWebSocket mel $ \melWS -> do forConcurrently_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do @@ -1356,6 +1362,8 @@ testMigrationToPostgresJustProteus = do retry500Once (addMembers convAdmin convId (def {users = [mel]})) >>= assertSuccess void $ awaitNMatches n isConvDeleteNotif melWS + replicateConcurrently n + $ createTestConv convAdmin tid [mel] waitForMigration :: (HasCallStack) => String -> App () waitForMigration domainM = do From cd8d01e548786b8224c89fdb4a9f612e871c94e2 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 28 Oct 2025 13:18:31 +0100 Subject: [PATCH 27/49] integration: Mvoe migration test to another module --- integration/integration.cabal | 1 + integration/test/Test/Conversation.hs | 381 ----------------- .../test/Test/Conversation/Migration.hs | 398 ++++++++++++++++++ 3 files changed, 399 insertions(+), 381 deletions(-) create mode 100644 integration/test/Test/Conversation/Migration.hs diff --git a/integration/integration.cabal b/integration/integration.cabal index 34de326e6d..9f9bd2c83d 100644 --- a/integration/integration.cabal +++ b/integration/integration.cabal @@ -126,6 +126,7 @@ library Test.Client Test.Connection Test.Conversation + Test.Conversation.Migration Test.Demo Test.DNSMock Test.DomainVerification diff --git a/integration/test/Test/Conversation.hs b/integration/test/Test/Conversation.hs index 380a1788fd..c699bbc726 100644 --- a/integration/test/Test/Conversation.hs +++ b/integration/test/Test/Conversation.hs @@ -29,23 +29,14 @@ import Control.Concurrent (threadDelay) import Control.Monad.Codensity import Control.Monad.Reader import qualified Data.Aeson as Aeson -import Data.IntMap (IntMap) -import qualified Data.IntMap as IntMap -import qualified Data.IntSet as IntSet -import Data.Text (Text) import qualified Data.Text as T -import qualified Data.Text as Text -import qualified Data.Text.Encoding as Text import GHC.Stack -import MLS.Util import Notifications import SetupHelpers hiding (deleteUser) import Testlib.One2One (generateRemoteAndConvIdWithDomain) import Testlib.Prelude import Testlib.ResourcePool import Testlib.VersionedFed -import Text.Regex.TDFA ((=~)) -import UnliftIO testFederatedConversation :: (HasCallStack) => App () testFederatedConversation = do @@ -1037,375 +1028,3 @@ testGetSelfMember = do resp.json %. "service" `shouldMatch` Null resp.json %. "status" `shouldMatchInt` 0 resp.json %. "status_ref" `shouldMatch` "0.0" - --- | The migration has these phases. --- 1. Write to cassandra (before any migration activity) --- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) --- 3. Backgound worker starts migration --- 4. Background worker finishes migration, galley is still configured to think migration is on going --- 5. Background worker is configured to not do anything, galley is configured to only use PG --- --- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. --- --- The tests are from the perspective of mel, a user on the dynamic backend, --- called backendM (migraing backend). There are also users called mark and mia --- on this backend. --- --- TODO: --- Also create convs and send messages in all phases -testMigrationToPostgresMLS :: App () -testMigrationToPostgresMLS = do - resourcePool <- asks (.resourcePool) - (alice, aliceTid, _) <- createTeam OwnDomain 1 - (bob, bobTid, _) <- createTeam OtherDomain 1 - [aliceC, bobC] <- traverse (createMLSClient def) [alice, bob] - - let phase1Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "cassandra", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase2Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase3Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" True - } - phase4Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase5Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phaseOverrides = - IntMap.fromList - [ (1, phase1Overrides), - (2, phase2Overrides), - (3, phase3Overrides), - (4, phase4Overrides), - (5, phase5Overrides) - ] - runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do - let domainM = migratingBackend.berDomain - (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do - [mel, mark] <- createUsers [domainM, domainM] - (mia, miaTid, _) <- createTeam domainM 1 - [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] - connectUsers [alice, bob, mel, mark, mia] - otherMelConvs <- getAllConvIds mel 100 - - domainAConvs <- createTestConvs aliceC aliceTid melC markC [] - domainBConvs <- createTestConvs bobC bobTid melC markC [] - domainMConvs <- createTestConvs miaC miaTid melC markC [] - pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) - - addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] - $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) - $ addJSONToFailureContext "otherMelConvs" otherMelConvs - $ do - let runPhase :: (HasCallStack) => Int -> App () - runPhase phase = do - putStrLn $ "----------> Start phase: " <> show phase - runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do - runPhaseOperations phase aliceC aliceTid domainAConvs melC markC - runPhaseOperations phase bobC bobTid domainBConvs melC markC - runPhaseOperations phase miaC miaTid domainMConvs melC markC - actualConvs <- getAllConvIds mel n - let expectedConvsFrom dom = - dom.unmodifiedConvs - <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) - <> concat (IntMap.elems dom.kickMarkConvs) - <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) - <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [(phase + 1) .. 5]))) - expectedConvs = - expectedConvsFrom domainAConvs - <> expectedConvsFrom domainBConvs - <> expectedConvsFrom domainMConvs - - actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) - - when (phase == 3) $ waitForMigration domainM - runPhase 1 - runPhase 2 - runPhase 3 - runPhase 4 - runPhase 5 - where - n = 1 - -- Creates n convs of these types: - -- 1. Convs that will exist unmodified during the test - -- 2. Convs that will kick mel in each phase - -- 3. Convs that will kick mark in each phase - -- 4. Convs that will be deleted in each phase - createTestConvs :: (HasCallStack) => ClientIdentity -> String -> ClientIdentity -> ClientIdentity -> [ClientIdentity] -> App TestConvList - createTestConvs creatorC tid melC markC othersC = do - unmodifiedConvs <- replicateM n $ do - createTestConv creatorC tid (melC : markC : othersC) - - kickMelConvs <- forPhase $ createTestConv creatorC tid (melC : othersC) - kickMarkConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) - delConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) - addMelConvs <- forPhase $ createTestConv creatorC tid othersC - pure $ TestConvList {..} - - createTestConv :: (HasCallStack) => ClientIdentity -> String -> [ClientIdentity] -> App ConvId - createTestConv creatorC tid membersC = do - conv <- createNewGroupWith def creatorC defMLS {team = Just tid} - traverse_ (uploadNewKeyPackage def) membersC - void $ createAddCommit creatorC conv ((.qualifiedUserId) <$> membersC) >>= sendAndConsumeCommitBundle - pure conv - - forPhase :: App a -> App (IntMap [a]) - forPhase action = - fmap IntMap.fromList . for [1 .. 5] $ \phase -> do - convs <- replicateM n $ action - pure (phase, convs) - - runPhaseOperations :: (HasCallStack) => Int -> ClientIdentity -> String -> TestConvList -> ClientIdentity -> ClientIdentity -> App () - runPhaseOperations phase convAdmin tid TestConvList {..} melC markC = do - for_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do - mp <- createRemoveCommit convAdmin convId [melC] - void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 - - for_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do - mp <- createRemoveCommit convAdmin convId [markC] - void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 - - for_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do - deleteTeamConversation tid convId convAdmin >>= assertSuccess - getConversation convAdmin convId `bindResponse` \resp -> - resp.status `shouldMatchInt` 404 - - for_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do - void $ uploadNewKeyPackage def melC - void $ createAddCommit convAdmin convId [melC.qualifiedUserId] >>= sendAndConsumeCommitBundle - - waitForMigration :: (HasCallStack) => String -> App () - waitForMigration domainM = do - metrics <- - getMetrics domainM BackgroundWorker `bindResponse` \resp -> do - resp.status `shouldMatchInt` 200 - pure $ Text.decodeUtf8 resp.body - let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do - liftIO $ threadDelay 100_000 - waitForMigration domainM - --- | The migration has these phases. --- 1. Write to cassandra (before any migration activity) --- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) --- 3. Backgound worker starts migration --- 4. Background worker finishes migration, galley is still configured to think migration is on going --- 5. Background worker is configured to not do anything, galley is configured to only use PG --- --- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. --- --- The tests are from the perspective of mel, a user on the dynamic backend, --- called backendM (migraing backend). There are also users called mark and mia --- on this backend. --- --- TODO: --- Also create convs and send messages in all phases -testMigrationToPostgresJustProteus :: App () -testMigrationToPostgresJustProteus = do - resourcePool <- asks (.resourcePool) - (alice, aliceTid, _) <- createTeam OwnDomain 1 - (bob, bobTid, _) <- createTeam OtherDomain 1 - - let phase1Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "cassandra", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase2Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase3Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" True - } - phase4Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase5Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phaseOverrides = - IntMap.fromList - [ (1, phase1Overrides), - (2, phase2Overrides), - (3, phase3Overrides), - (4, phase4Overrides), - (5, phase5Overrides) - ] - runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do - let domainM = migratingBackend.berDomain - (mel, _melC, mark, _markC, mia, _miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do - [mel, mark] <- createUsers [domainM, domainM] - (mia, miaTid, _) <- createTeam domainM 1 - [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] - connectUsers [alice, bob, mel, mark, mia] - otherMelConvs <- getAllConvIds mel 100 - - domainAConvs <- createTestConvs alice aliceTid mel mark [] - domainBConvs <- createTestConvs bob bobTid mel mark [] - domainMConvs <- createTestConvs mia miaTid mel mark [] - pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) - - newConvsRef <- newIORef [] - addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] - $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) - $ addJSONToFailureContext "otherMelConvs" otherMelConvs - $ do - let runPhase :: (HasCallStack) => Int -> App () - runPhase phase = do - putStrLn $ "----------> Start phase: " <> show phase - runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do - newDomainAConvs <- runPhaseOperations phase alice aliceTid domainAConvs mel mark - newDomainBConvs <- runPhaseOperations phase bob bobTid domainBConvs mel mark - newDomainCConvs <- runPhaseOperations phase mia miaTid domainMConvs mel mark - let newConvs = newDomainAConvs <> newDomainBConvs <> newDomainCConvs - modifyIORef newConvsRef (newConvs <>) - allNewConvs <- readIORef newConvsRef - actualConvs <- getAllConvIds mel n - let expectedConvsFrom dom = - dom.unmodifiedConvs - <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) - <> concat (IntMap.elems dom.kickMarkConvs) - <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) - <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [1 .. phase]))) - expectedConvs = - expectedConvsFrom domainAConvs - <> expectedConvsFrom domainBConvs - <> expectedConvsFrom domainMConvs - <> allNewConvs - - actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) - - when (phase == 3) $ waitForMigration domainM - runPhase 1 - runPhase 2 - runPhase 3 - runPhase 4 - runPhase 5 - where - n = 10 - -- Creates n convs of these types: - -- 1. Convs that will exist unmodified during the test - -- 2. Convs that will kick mel in each phase - -- 3. Convs that will kick mark in each phase - -- 4. Convs that will be deleted in each phase - -- 5. Convs that will add mel in each phase - createTestConvs :: (HasCallStack) => Value -> String -> Value -> Value -> [Value] -> App TestConvList - createTestConvs creatorC tid mel mark others = do - unmodifiedConvs <- replicateConcurrently n $ do - createTestConv creatorC tid (mel : mark : others) - - kickMelConvs <- forPhase $ createTestConv creatorC tid (mel : others) - kickMarkConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) - delConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) - addMelConvs <- forPhase $ createTestConv creatorC tid others - pure $ TestConvList {..} - - createTestConv :: (HasCallStack) => Value -> String -> [Value] -> App ConvId - createTestConv creator tid members = do - postConversation creator defProteus {team = Just tid, qualifiedUsers = members} - >>= getJSON 201 - >>= objConvId - - forPhase :: App a -> App (IntMap [a]) - forPhase action = - fmap IntMap.fromList . forConcurrently [1 .. 5] $ \phase -> do - convs <- replicateM n $ action - pure (phase, convs) - - retry500Once :: App Response -> App Response - retry500Once action = do - action `bindResponse` \resp -> do - if resp.status == 500 - then action - else pure resp - - runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App [ConvId] - runPhaseOperations phase convAdmin tid TestConvList {..} mel mark = do - withWebSocket mel $ \melWS -> do - forConcurrently_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do - retry500Once (removeMember convAdmin convId mel) >>= assertSuccess - - void $ awaitNMatches n isConvLeaveNotif melWS - - forConcurrently_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do - retry500Once (removeMember convAdmin convId mark) >>= assertSuccess - - void $ awaitNMatches n isConvLeaveNotif melWS - - forConcurrently_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do - retry500Once (deleteTeamConversation tid convId convAdmin) >>= assertSuccess - - forConcurrently_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do - retry500Once (addMembers convAdmin convId (def {users = [mel]})) >>= assertSuccess - - void $ awaitNMatches n isConvDeleteNotif melWS - replicateConcurrently n - $ createTestConv convAdmin tid [mel] - - waitForMigration :: (HasCallStack) => String -> App () - waitForMigration domainM = do - metrics <- - getMetrics domainM BackgroundWorker `bindResponse` \resp -> do - resp.status `shouldMatchInt` 200 - pure $ Text.decodeUtf8 resp.body - let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do - liftIO $ threadDelay 100_000 - waitForMigration domainM - --- Test Helpers - -data TestConvList = TestConvList - { unmodifiedConvs :: [ConvId], - kickMelConvs :: IntMap [ConvId], - kickMarkConvs :: IntMap [ConvId], - delConvs :: IntMap [ConvId], - addMelConvs :: IntMap [ConvId] - } - -instance ToJSON TestConvList where - toJSON convList = do - object - [ fromString "unmodifiedConvs" .= (mkId <$> convList.unmodifiedConvs), - fromString "kickMelConvs" .= (mkId <$$> convList.kickMelConvs), - fromString "kickMarkConvs" .= (mkId <$$> convList.kickMarkConvs), - fromString "delConvs" .= (mkId <$$> convList.delConvs), - fromString "addMelConvs" .= (mkId <$$> convList.addMelConvs) - ] - where - mkId :: ConvId -> String - mkId cid = cid.id_ <> "@" <> cid.domain - -instance Semigroup TestConvList where - l1 <> l2 = - TestConvList - { unmodifiedConvs = l1.unmodifiedConvs <> l2.unmodifiedConvs, - kickMelConvs = IntMap.unionWith (<>) l1.kickMelConvs l2.kickMelConvs, - kickMarkConvs = IntMap.unionWith (<>) l1.kickMarkConvs l2.kickMarkConvs, - delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs, - addMelConvs = IntMap.unionWith (<>) l1.addMelConvs l2.addMelConvs - } diff --git a/integration/test/Test/Conversation/Migration.hs b/integration/test/Test/Conversation/Migration.hs new file mode 100644 index 0000000000..659ec55b6e --- /dev/null +++ b/integration/test/Test/Conversation/Migration.hs @@ -0,0 +1,398 @@ +module Test.Conversation.Migration where + +import API.Galley +import Control.Applicative +import Control.Concurrent (threadDelay) +import Control.Monad.Codensity +import Control.Monad.Reader +import Data.IntMap (IntMap) +import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import Data.Text (Text) +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text +import GHC.Stack +import MLS.Util +import Notifications +import SetupHelpers hiding (deleteUser) +import Testlib.Prelude +import Testlib.ResourcePool +import Text.Regex.TDFA ((=~)) +import UnliftIO + +-- | The migration has these phases. +-- 1. Write to cassandra (before any migration activity) +-- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) +-- 3. Backgound worker starts migration +-- 4. Background worker finishes migration, galley is still configured to think migration is on going +-- 5. Background worker is configured to not do anything, galley is configured to only use PG +-- +-- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. +-- +-- The tests are from the perspective of mel, a user on the dynamic backend, +-- called backendM (migraing backend). There are also users called mark and mia +-- on this backend. +-- +-- TODO: +-- Also create convs and send messages in all phases +testMigrationToPostgresMLS :: App () +testMigrationToPostgresMLS = do + resourcePool <- asks (.resourcePool) + (alice, aliceTid, _) <- createTeam OwnDomain 1 + (bob, bobTid, _) <- createTeam OtherDomain 1 + [aliceC, bobC] <- traverse (createMLSClient def) [alice, bob] + + let phase1Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "cassandra", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase2Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase3Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" True + } + phase4Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase5Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phaseOverrides = + IntMap.fromList + [ (1, phase1Overrides), + (2, phase2Overrides), + (3, phase3Overrides), + (4, phase4Overrides), + (5, phase5Overrides) + ] + runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do + let domainM = migratingBackend.berDomain + (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do + [mel, mark] <- createUsers [domainM, domainM] + (mia, miaTid, _) <- createTeam domainM 1 + [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] + connectUsers [alice, bob, mel, mark, mia] + otherMelConvs <- getAllConvIds mel 100 + + domainAConvs <- createTestConvs aliceC aliceTid melC markC [] + domainBConvs <- createTestConvs bobC bobTid melC markC [] + domainMConvs <- createTestConvs miaC miaTid melC markC [] + pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + + addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] + $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ addJSONToFailureContext "otherMelConvs" otherMelConvs + $ do + let runPhase :: (HasCallStack) => Int -> App () + runPhase phase = do + putStrLn $ "----------> Start phase: " <> show phase + runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do + runPhaseOperations phase aliceC aliceTid domainAConvs melC markC + runPhaseOperations phase bobC bobTid domainBConvs melC markC + runPhaseOperations phase miaC miaTid domainMConvs melC markC + actualConvs <- getAllConvIds mel n + let expectedConvsFrom dom = + dom.unmodifiedConvs + <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems dom.kickMarkConvs) + <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + expectedConvs = + expectedConvsFrom domainAConvs + <> expectedConvsFrom domainBConvs + <> expectedConvsFrom domainMConvs + + actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) + + when (phase == 3) $ waitForMigration domainM + runPhase 1 + runPhase 2 + runPhase 3 + runPhase 4 + runPhase 5 + where + n = 1 + -- Creates n convs of these types: + -- 1. Convs that will exist unmodified during the test + -- 2. Convs that will kick mel in each phase + -- 3. Convs that will kick mark in each phase + -- 4. Convs that will be deleted in each phase + createTestConvs :: (HasCallStack) => ClientIdentity -> String -> ClientIdentity -> ClientIdentity -> [ClientIdentity] -> App TestConvList + createTestConvs creatorC tid melC markC othersC = do + unmodifiedConvs <- replicateM n $ do + createTestConv creatorC tid (melC : markC : othersC) + + kickMelConvs <- forPhase $ createTestConv creatorC tid (melC : othersC) + kickMarkConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) + delConvs <- forPhase $ createTestConv creatorC tid (melC : markC : othersC) + addMelConvs <- forPhase $ createTestConv creatorC tid othersC + pure $ TestConvList {..} + + createTestConv :: (HasCallStack) => ClientIdentity -> String -> [ClientIdentity] -> App ConvId + createTestConv creatorC tid membersC = do + conv <- createNewGroupWith def creatorC defMLS {team = Just tid} + traverse_ (uploadNewKeyPackage def) membersC + void $ createAddCommit creatorC conv ((.qualifiedUserId) <$> membersC) >>= sendAndConsumeCommitBundle + pure conv + + forPhase :: App a -> App (IntMap [a]) + forPhase action = + fmap IntMap.fromList . for [1 .. 5] $ \phase -> do + convs <- replicateM n $ action + pure (phase, convs) + + runPhaseOperations :: (HasCallStack) => Int -> ClientIdentity -> String -> TestConvList -> ClientIdentity -> ClientIdentity -> App () + runPhaseOperations phase convAdmin tid TestConvList {..} melC markC = do + for_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do + mp <- createRemoveCommit convAdmin convId [melC] + void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 + + for_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do + mp <- createRemoveCommit convAdmin convId [markC] + void $ postMLSCommitBundle mp.sender (mkBundle mp) >>= getJSON 201 + + for_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do + deleteTeamConversation tid convId convAdmin >>= assertSuccess + getConversation convAdmin convId `bindResponse` \resp -> + resp.status `shouldMatchInt` 404 + + for_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do + void $ uploadNewKeyPackage def melC + void $ createAddCommit convAdmin convId [melC.qualifiedUserId] >>= sendAndConsumeCommitBundle + + waitForMigration :: (HasCallStack) => String -> App () + waitForMigration domainM = do + metrics <- + getMetrics domainM BackgroundWorker `bindResponse` \resp -> do + resp.status `shouldMatchInt` 200 + pure $ Text.decodeUtf8 resp.body + let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do + liftIO $ threadDelay 100_000 + waitForMigration domainM + +-- | The migration has these phases. +-- 1. Write to cassandra (before any migration activity) +-- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) +-- 3. Backgound worker starts migration +-- 4. Background worker finishes migration, galley is still configured to think migration is on going +-- 5. Background worker is configured to not do anything, galley is configured to only use PG +-- +-- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. +-- +-- The tests are from the perspective of mel, a user on the dynamic backend, +-- called backendM (migraing backend). There are also users called mark and mia +-- on this backend. +-- +-- TODO: +-- Also create convs and send messages in all phases +testMigrationToPostgresJustProteus :: App () +testMigrationToPostgresJustProteus = do + resourcePool <- asks (.resourcePool) + (alice, aliceTid, _) <- createTeam OwnDomain 1 + (bob, bobTid, _) <- createTeam OtherDomain 1 + + let phase1Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "cassandra", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase2Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase3Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" True + } + phase4Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phase5Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + phaseOverrides = + IntMap.fromList + [ (1, phase1Overrides), + (2, phase2Overrides), + (3, phase3Overrides), + (4, phase4Overrides), + (5, phase5Overrides) + ] + runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do + let domainM = migratingBackend.berDomain + (mel, _melC, mark, _markC, mia, _miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do + [mel, mark] <- createUsers [domainM, domainM] + (mia, miaTid, _) <- createTeam domainM 1 + [melC, markC, miaC] <- traverse (createMLSClient def) [mel, mark, mia] + connectUsers [alice, bob, mel, mark, mia] + otherMelConvs <- getAllConvIds mel 100 + + -- Other convs + pooledReplicateConcurrentlyN_ 32 500 $ createTestConv mia miaTid [] + pooledReplicateConcurrentlyN_ 32 500 $ createTestConv alice aliceTid [mia] + pooledReplicateConcurrentlyN_ 32 500 $ createTestConv bob bobTid [mia] + + domainAConvs <- createTestConvs alice aliceTid mel mark [] + domainBConvs <- createTestConvs bob bobTid mel mark [] + domainMConvs <- createTestConvs mia miaTid mel mark [] + pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + + newConvsRef <- newIORef [] + addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] + $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) + $ addJSONToFailureContext "otherMelConvs" otherMelConvs + $ do + let runPhase :: (HasCallStack) => Int -> App () + runPhase phase = do + putStrLn $ "----------> Start phase: " <> show phase + runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do + newDomainAConvs <- runPhaseOperations phase alice aliceTid domainAConvs mel mark + newDomainBConvs <- runPhaseOperations phase bob bobTid domainBConvs mel mark + newDomainCConvs <- runPhaseOperations phase mia miaTid domainMConvs mel mark + let newConvs = newDomainAConvs <> newDomainBConvs <> newDomainCConvs + modifyIORef newConvsRef (newConvs <>) + allNewConvs <- readIORef newConvsRef + actualConvs <- getAllConvIds mel n + let expectedConvsFrom dom = + dom.unmodifiedConvs + <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems dom.kickMarkConvs) + <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [1 .. phase]))) + expectedConvs = + expectedConvsFrom domainAConvs + <> expectedConvsFrom domainBConvs + <> expectedConvsFrom domainMConvs + <> allNewConvs + + actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) + + when (phase == 3) $ waitForMigration domainM + runPhase 1 + runPhase 2 + runPhase 3 + runPhase 4 + runPhase 5 + where + n = 1 + -- Creates n convs of these types: + -- 1. Convs that will exist unmodified during the test + -- 2. Convs that will kick mel in each phase + -- 3. Convs that will kick mark in each phase + -- 4. Convs that will be deleted in each phase + -- 5. Convs that will add mel in each phase + createTestConvs :: (HasCallStack) => Value -> String -> Value -> Value -> [Value] -> App TestConvList + createTestConvs creatorC tid mel mark others = do + unmodifiedConvs <- replicateConcurrently n $ do + createTestConv creatorC tid (mel : mark : others) + + kickMelConvs <- forPhase $ createTestConv creatorC tid (mel : others) + kickMarkConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) + delConvs <- forPhase $ createTestConv creatorC tid (mel : mark : others) + addMelConvs <- forPhase $ createTestConv creatorC tid others + pure $ TestConvList {..} + + createTestConv :: (HasCallStack) => Value -> String -> [Value] -> App ConvId + createTestConv creator tid members = do + postConversation creator defProteus {team = Just tid, qualifiedUsers = members} + >>= getJSON 201 + >>= objConvId + + forPhase :: App a -> App (IntMap [a]) + forPhase action = + fmap IntMap.fromList . forConcurrently [1 .. 5] $ \phase -> do + convs <- replicateM n $ action + pure (phase, convs) + + retry500Once :: App Response -> App Response + retry500Once action = do + action `bindResponse` \resp -> do + if resp.status == 500 + then action + else pure resp + + runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App [ConvId] + runPhaseOperations phase convAdmin tid TestConvList {..} mel mark = do + withWebSocket mel $ \melWS -> do + forConcurrently_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do + retry500Once (removeMember convAdmin convId mel) >>= assertSuccess + + void $ awaitNMatches n isConvLeaveNotif melWS + + forConcurrently_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do + retry500Once (removeMember convAdmin convId mark) >>= assertSuccess + + void $ awaitNMatches n isConvLeaveNotif melWS + + forConcurrently_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do + retry500Once (deleteTeamConversation tid convId convAdmin) >>= assertSuccess + + forConcurrently_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do + retry500Once (addMembers convAdmin convId (def {users = [mel]})) >>= assertSuccess + + void $ awaitNMatches n isConvDeleteNotif melWS + replicateConcurrently n + $ createTestConv convAdmin tid [mel] + + waitForMigration :: (HasCallStack) => String -> App () + waitForMigration domainM = do + metrics <- + getMetrics domainM BackgroundWorker `bindResponse` \resp -> do + resp.status `shouldMatchInt` 200 + pure $ Text.decodeUtf8 resp.body + let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do + liftIO $ threadDelay 100_000 + waitForMigration domainM + +-- Test Helpers + +data TestConvList = TestConvList + { unmodifiedConvs :: [ConvId], + kickMelConvs :: IntMap [ConvId], + kickMarkConvs :: IntMap [ConvId], + delConvs :: IntMap [ConvId], + addMelConvs :: IntMap [ConvId] + } + +instance ToJSON TestConvList where + toJSON convList = do + object + [ fromString "unmodifiedConvs" .= (mkId <$> convList.unmodifiedConvs), + fromString "kickMelConvs" .= (mkId <$$> convList.kickMelConvs), + fromString "kickMarkConvs" .= (mkId <$$> convList.kickMarkConvs), + fromString "delConvs" .= (mkId <$$> convList.delConvs), + fromString "addMelConvs" .= (mkId <$$> convList.addMelConvs) + ] + where + mkId :: ConvId -> String + mkId cid = cid.id_ <> "@" <> cid.domain + +instance Semigroup TestConvList where + l1 <> l2 = + TestConvList + { unmodifiedConvs = l1.unmodifiedConvs <> l2.unmodifiedConvs, + kickMelConvs = IntMap.unionWith (<>) l1.kickMelConvs l2.kickMelConvs, + kickMarkConvs = IntMap.unionWith (<>) l1.kickMarkConvs l2.kickMarkConvs, + delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs, + addMelConvs = IntMap.unionWith (<>) l1.addMelConvs l2.addMelConvs + } From 0265b3202d27b10acd7108fb7d8c1d794feeceba Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 09:17:49 +0100 Subject: [PATCH 28/49] wire-subsystems: Move PGConstraints to Wire.Postgres --- .../src/Wire/ConversationStore/Cassandra.hs | 3 ++- .../ConversationStore/Migration/Cleanup.hs | 1 - .../Wire/ConversationStore/MigrationLock.hs | 2 +- .../src/Wire/ConversationStore/Postgres.hs | 6 ------ libs/wire-subsystems/src/Wire/Postgres.hs | 20 ++++++++++--------- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index 409ef4a6fb..e1fb75ae87 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -77,7 +77,8 @@ import Wire.ConversationStore.Cassandra.Queries qualified as Queries import Wire.ConversationStore.MLS.Types import Wire.ConversationStore.Migration.Cleanup import Wire.ConversationStore.MigrationLock -import Wire.ConversationStore.Postgres (PGConstraints, interpretConversationStoreToPostgres) +import Wire.ConversationStore.Postgres (interpretConversationStoreToPostgres) +import Wire.Postgres import Wire.Sem.Paging.Cassandra import Wire.StoredConversation import Wire.StoredConversation qualified as StoreConv diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs index 875a9742c0..aac51a697d 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs @@ -16,7 +16,6 @@ import Wire.API.PostgresMarshall import Wire.ConversationStore import Wire.ConversationStore.MLS.Types import Wire.ConversationStore.Migration.Types -import Wire.ConversationStore.Postgres import Wire.Postgres import Wire.StoredConversation import Wire.Util diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs index d77cd0e987..323122fef0 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/MigrationLock.hs @@ -19,7 +19,7 @@ import Polysemy.TinyLog (TinyLog) import Polysemy.TinyLog qualified as TinyLog import System.Logger.Message qualified as Log import Wire.API.PostgresMarshall -import Wire.ConversationStore.Postgres +import Wire.Postgres data LockType = -- | Used for migrating a conversation, will block any other locks diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index 2b2028aa66..fb73fb7dec 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -46,12 +46,6 @@ import Wire.Sem.Paging.Cassandra import Wire.StoredConversation import Wire.UserList -type PGConstraints r = - ( Member (Input Hasql.Pool) r, - Member (Embed IO) r, - Member (Error Hasql.UsageError) r - ) - interpretConversationStoreToPostgres :: (PGConstraints r) => InterpreterFor ConversationStore r interpretConversationStoreToPostgres = interpret $ \case UpsertConversation lcnv nc -> upsertConversationImpl lcnv nc diff --git a/libs/wire-subsystems/src/Wire/Postgres.hs b/libs/wire-subsystems/src/Wire/Postgres.hs index d7516aff0c..0d7b180108 100644 --- a/libs/wire-subsystems/src/Wire/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/Postgres.hs @@ -24,6 +24,7 @@ module Wire.Postgres runTransaction, runPipeline, parseCount, + PGConstraints, -- * Query builder QueryFragment, @@ -55,6 +56,7 @@ import Hasql.Decoders qualified as Dec import Hasql.Encoders qualified as Enc import Hasql.Pipeline (Pipeline) import Hasql.Pool +import Hasql.Pool qualified as Hasql import Hasql.Session import Hasql.Statement import Hasql.Transaction (Transaction) @@ -66,11 +68,14 @@ import Polysemy.Error (Error, throw) import Polysemy.Input import Wire.API.Pagination -runStatement :: - ( Member (Input Pool) r, +type PGConstraints r = + ( Member (Input Hasql.Pool) r, Member (Embed IO) r, - Member (Error UsageError) r - ) => + Member (Error Hasql.UsageError) r + ) + +runStatement :: + (PGConstraints r) => a -> Statement a b -> Sem r b @@ -79,7 +84,7 @@ runStatement a stmt = do liftIO (use pool (statement a stmt)) >>= either throw pure runTransaction :: - (Member (Input Pool) r, Member (Embed IO) r, Member (Error UsageError) r) => + (PGConstraints r) => IsolationLevel -> Mode -> Transaction a -> @@ -89,10 +94,7 @@ runTransaction isolationLevel mode t = do liftIO (use pool $ Transaction.transaction isolationLevel mode t) >>= either throw pure runPipeline :: - ( Member (Input Pool) r, - Member (Embed IO) r, - Member (Error UsageError) r - ) => + (PGConstraints r) => Pipeline a -> Sem r a runPipeline p = do From 045c334dbd3d39e59e0fb9e63045c195f263284f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 09:20:07 +0100 Subject: [PATCH 29/49] ConversationStore.Cassandra: Implement SearchConversation for the migration interpreter It returns empty. --- .../wire-subsystems/src/Wire/ConversationStore/Cassandra.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index e1fb75ae87..d14e0c7745 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -1453,6 +1453,12 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case isConvInPostgres convId >>= \case False -> embedClient client $ deleteSubConversation convId subConvId True -> interpretConversationStoreToPostgres $ ConvStore.deleteSubConversation convId subConvId + SearchConversations _ -> do + -- In theory, it is possible to make this partially work. But we don't have + -- to worry so much about this interpreter to be used only during the + -- transition. + logEffect "ConversationStore.SearchConversations" + pure [] HaveRemoteConvs uids -> do logEffect "ConversationStore.DeleteSubConversation" withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do From d0c27d4f30039207bd314b9ba2791d24dac2255f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 09:31:31 +0100 Subject: [PATCH 30/49] wire-subsystems: Small compiler errors --- .../src/Wire/ConversationStore/Migration.hs | 3 +-- .../src/Wire/ConversationStore/Postgres.hs | 15 --------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 22891084d0..78a1866c89 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -43,13 +43,12 @@ import Wire.API.MLS.LeafNode import Wire.API.MLS.SubConversation import Wire.API.PostgresMarshall import Wire.API.Provider.Service -import Wire.ConversationStore +import Wire.ConversationStore hiding (pageSize) import Wire.ConversationStore.Cassandra (interpretConversationStoreToCassandra) import Wire.ConversationStore.MLS.Types import Wire.ConversationStore.Migration.Cleanup import Wire.ConversationStore.Migration.Types import Wire.ConversationStore.MigrationLock -import Wire.ConversationStore.Postgres import Wire.Postgres import Wire.Sem.Logger.TinyLog (loggerToTinyLog) import Wire.Sem.Paging.Cassandra diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index fb73fb7dec..b94008682a 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -689,21 +689,6 @@ mkLocalMember (cid, uid, mServiceId, mProviderId, msOtrMutedStatus, msOtrMutedRe } ) -mkLocalMemberRow :: ConvId -> LocalMember -> LocalMemberRow -mkLocalMemberRow cid lm = - ( cid, - lm.id_, - _serviceRefId <$> lm.service, - _serviceRefProvider <$> lm.service, - lm.status.msOtrMutedStatus, - lm.status.msOtrMutedRef, - Just lm.status.msOtrArchived, - lm.status.msOtrArchivedRef, - Just lm.status.msHidden, - lm.status.msHiddenRef, - Just lm.convRoleName - ) - type RemoteMemberRow = (ConvId, Domain, UserId, RoleName) getRemoteMemberImpl :: (PGConstraints r) => ConvId -> Remote UserId -> Sem r (Maybe RemoteMember) From 757571ad8543cfbe2d0a1844a21c7ef77ac5db86 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 15:07:16 +0100 Subject: [PATCH 31/49] integration: Align the MLS and Proteus tests and clean them up --- .../test/Test/Conversation/Migration.hs | 240 +++++++----------- 1 file changed, 89 insertions(+), 151 deletions(-) diff --git a/integration/test/Test/Conversation/Migration.hs b/integration/test/Test/Conversation/Migration.hs index 659ec55b6e..2e5b184f55 100644 --- a/integration/test/Test/Conversation/Migration.hs +++ b/integration/test/Test/Conversation/Migration.hs @@ -1,3 +1,15 @@ +-- | The migration has these phases. +-- 1. Write to cassandra (before any migration activity) +-- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) +-- 3. Backgound worker starts migration +-- 4. Background worker finishes migration, galley is still configured to think migration is on going +-- 5. Background worker is configured to not do anything, galley is configured to only use PG +-- +-- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. +-- +-- The tests are from the perspective of mel, a user on the dynamic backend, +-- called backendM (migraing backend). There are also users called mark and mia +-- on this backend. module Test.Conversation.Migration where import API.Galley @@ -20,21 +32,8 @@ import Testlib.ResourcePool import Text.Regex.TDFA ((=~)) import UnliftIO --- | The migration has these phases. --- 1. Write to cassandra (before any migration activity) --- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) --- 3. Backgound worker starts migration --- 4. Background worker finishes migration, galley is still configured to think migration is on going --- 5. Background worker is configured to not do anything, galley is configured to only use PG --- --- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. --- --- The tests are from the perspective of mel, a user on the dynamic backend, --- called backendM (migraing backend). There are also users called mark and mia --- on this backend. --- --- TODO: --- Also create convs and send messages in all phases +-- | Our test setup cannot process updates to many MLS convs concurrently, so we +-- run this will only 1 conv per type per phase and use no concurrency. testMigrationToPostgresMLS :: App () testMigrationToPostgresMLS = do resourcePool <- asks (.resourcePool) @@ -42,39 +41,6 @@ testMigrationToPostgresMLS = do (bob, bobTid, _) <- createTeam OtherDomain 1 [aliceC, bobC] <- traverse (createMLSClient def) [alice, bob] - let phase1Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "cassandra", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase2Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase3Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" True - } - phase4Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase5Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phaseOverrides = - IntMap.fromList - [ (1, phase1Overrides), - (2, phase2Overrides), - (3, phase3Overrides), - (4, phase4Overrides), - (5, phase5Overrides) - ] runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do let domainM = migratingBackend.berDomain (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do @@ -89,28 +55,32 @@ testMigrationToPostgresMLS = do domainMConvs <- createTestConvs miaC miaTid melC markC [] pure (mel, melC, mark, markC, mia, miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) + newConvsRef <- newIORef [] addUsersToFailureContext [("alice", alice), ("bob", bob), ("mel", mel), ("mark", mark), ("mia", mia)] $ addJSONToFailureContext "convIds" (domainAConvs <> domainBConvs <> domainMConvs) $ addJSONToFailureContext "otherMelConvs" otherMelConvs $ do let runPhase :: (HasCallStack) => Int -> App () runPhase phase = do - putStrLn $ "----------> Start phase: " <> show phase runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do - runPhaseOperations phase aliceC aliceTid domainAConvs melC markC - runPhaseOperations phase bobC bobTid domainBConvs melC markC - runPhaseOperations phase miaC miaTid domainMConvs melC markC + newDomainAConvs <- runPhaseOperations phase aliceC aliceTid domainAConvs melC markC + newDomainBConvs <- runPhaseOperations phase bobC bobTid domainBConvs melC markC + newDomainCConvs <- runPhaseOperations phase miaC miaTid domainMConvs melC markC + let newConvs = newDomainAConvs <> newDomainBConvs <> newDomainCConvs + modifyIORef newConvsRef (newConvs <>) + allNewConvs <- readIORef newConvsRef actualConvs <- getAllConvIds mel n let expectedConvsFrom dom = dom.unmodifiedConvs <> concat (IntMap.elems (IntMap.restrictKeys dom.kickMelConvs (IntSet.fromList [(phase + 1) .. 5]))) <> concat (IntMap.elems dom.kickMarkConvs) <> concat (IntMap.elems (IntMap.restrictKeys dom.delConvs (IntSet.fromList [(phase + 1) .. 5]))) - <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [(phase + 1) .. 5]))) + <> concat (IntMap.elems (IntMap.restrictKeys dom.addMelConvs (IntSet.fromList [1 .. phase]))) expectedConvs = expectedConvsFrom domainAConvs <> expectedConvsFrom domainBConvs <> expectedConvsFrom domainMConvs + <> allNewConvs actualConvs `shouldMatchSet` ((convIdToQidObject <$> expectedConvs) <> otherMelConvs) @@ -122,11 +92,6 @@ testMigrationToPostgresMLS = do runPhase 5 where n = 1 - -- Creates n convs of these types: - -- 1. Convs that will exist unmodified during the test - -- 2. Convs that will kick mel in each phase - -- 3. Convs that will kick mark in each phase - -- 4. Convs that will be deleted in each phase createTestConvs :: (HasCallStack) => ClientIdentity -> String -> ClientIdentity -> ClientIdentity -> [ClientIdentity] -> App TestConvList createTestConvs creatorC tid melC markC othersC = do unmodifiedConvs <- replicateM n $ do @@ -151,7 +116,7 @@ testMigrationToPostgresMLS = do convs <- replicateM n $ action pure (phase, convs) - runPhaseOperations :: (HasCallStack) => Int -> ClientIdentity -> String -> TestConvList -> ClientIdentity -> ClientIdentity -> App () + runPhaseOperations :: (HasCallStack) => Int -> ClientIdentity -> String -> TestConvList -> ClientIdentity -> ClientIdentity -> App [ConvId] runPhaseOperations phase convAdmin tid TestConvList {..} melC markC = do for_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do mp <- createRemoveCommit convAdmin convId [melC] @@ -170,72 +135,14 @@ testMigrationToPostgresMLS = do void $ uploadNewKeyPackage def melC void $ createAddCommit convAdmin convId [melC.qualifiedUserId] >>= sendAndConsumeCommitBundle - waitForMigration :: (HasCallStack) => String -> App () - waitForMigration domainM = do - metrics <- - getMetrics domainM BackgroundWorker `bindResponse` \resp -> do - resp.status `shouldMatchInt` 200 - pure $ Text.decodeUtf8 resp.body - let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do - liftIO $ threadDelay 100_000 - waitForMigration domainM + replicateM n $ createTestConv convAdmin tid [melC] --- | The migration has these phases. --- 1. Write to cassandra (before any migration activity) --- 2. Galley is prepared for migrations (new things created in PG, old things are in Cassandra) --- 3. Backgound worker starts migration --- 4. Background worker finishes migration, galley is still configured to think migration is on going --- 5. Background worker is configured to not do anything, galley is configured to only use PG --- --- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. --- --- The tests are from the perspective of mel, a user on the dynamic backend, --- called backendM (migraing backend). There are also users called mark and mia --- on this backend. --- --- TODO: --- Also create convs and send messages in all phases -testMigrationToPostgresJustProteus :: App () -testMigrationToPostgresJustProteus = do +testMigrationToPostgresProteus :: App () +testMigrationToPostgresProteus = do resourcePool <- asks (.resourcePool) (alice, aliceTid, _) <- createTeam OwnDomain 1 (bob, bobTid, _) <- createTeam OtherDomain 1 - let phase1Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "cassandra", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase2Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase3Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" True - } - phase4Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phase5Overrides = - def - { galleyCfg = setField "postgresMigration.conversation" "postgresql", - backgroundWorkerCfg = setField "migrateConversations" False - } - phaseOverrides = - IntMap.fromList - [ (1, phase1Overrides), - (2, phase2Overrides), - (3, phase3Overrides), - (4, phase4Overrides), - (5, phase5Overrides) - ] runCodensity (acquireResources 1 resourcePool) $ \[migratingBackend] -> do let domainM = migratingBackend.berDomain (mel, _melC, mark, _markC, mia, _miaC, miaTid, domainAConvs, domainBConvs, domainMConvs, otherMelConvs) <- runCodensity (startDynamicBackend migratingBackend phase1Overrides) $ \_ -> do @@ -245,10 +152,10 @@ testMigrationToPostgresJustProteus = do connectUsers [alice, bob, mel, mark, mia] otherMelConvs <- getAllConvIds mel 100 - -- Other convs - pooledReplicateConcurrentlyN_ 32 500 $ createTestConv mia miaTid [] - pooledReplicateConcurrentlyN_ 32 500 $ createTestConv alice aliceTid [mia] - pooledReplicateConcurrentlyN_ 32 500 $ createTestConv bob bobTid [mia] + -- Other convs which just exist + pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv mia miaTid [] + pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv alice aliceTid [mia] + pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv bob bobTid [mia] domainAConvs <- createTestConvs alice aliceTid mel mark [] domainBConvs <- createTestConvs bob bobTid mel mark [] @@ -262,7 +169,6 @@ testMigrationToPostgresJustProteus = do $ do let runPhase :: (HasCallStack) => Int -> App () runPhase phase = do - putStrLn $ "----------> Start phase: " <> show phase runCodensity (startDynamicBackend migratingBackend (phaseOverrides IntMap.! phase)) $ \_ -> do newDomainAConvs <- runPhaseOperations phase alice aliceTid domainAConvs mel mark newDomainBConvs <- runPhaseOperations phase bob bobTid domainBConvs mel mark @@ -292,16 +198,11 @@ testMigrationToPostgresJustProteus = do runPhase 4 runPhase 5 where - n = 1 - -- Creates n convs of these types: - -- 1. Convs that will exist unmodified during the test - -- 2. Convs that will kick mel in each phase - -- 3. Convs that will kick mark in each phase - -- 4. Convs that will be deleted in each phase - -- 5. Convs that will add mel in each phase + n = 20 + parallellism = 8 createTestConvs :: (HasCallStack) => Value -> String -> Value -> Value -> [Value] -> App TestConvList createTestConvs creatorC tid mel mark others = do - unmodifiedConvs <- replicateConcurrently n $ do + unmodifiedConvs <- pooledReplicateConcurrentlyN parallellism n $ do createTestConv creatorC tid (mel : mark : others) kickMelConvs <- forPhase $ createTestConv creatorC tid (mel : others) @@ -318,7 +219,7 @@ testMigrationToPostgresJustProteus = do forPhase :: App a -> App (IntMap [a]) forPhase action = - fmap IntMap.fromList . forConcurrently [1 .. 5] $ \phase -> do + fmap IntMap.fromList . pooledForConcurrentlyN parallellism [1 .. 5] $ \phase -> do convs <- replicateM n $ action pure (phase, convs) @@ -332,38 +233,26 @@ testMigrationToPostgresJustProteus = do runPhaseOperations :: (HasCallStack) => Int -> Value -> String -> TestConvList -> Value -> Value -> App [ConvId] runPhaseOperations phase convAdmin tid TestConvList {..} mel mark = do withWebSocket mel $ \melWS -> do - forConcurrently_ (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do + pooledForConcurrentlyN_ parallellism (IntMap.findWithDefault [] phase kickMelConvs) $ \convId -> do retry500Once (removeMember convAdmin convId mel) >>= assertSuccess void $ awaitNMatches n isConvLeaveNotif melWS - forConcurrently_ (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do + pooledForConcurrentlyN_ parallellism (IntMap.findWithDefault [] phase kickMarkConvs) $ \convId -> do retry500Once (removeMember convAdmin convId mark) >>= assertSuccess void $ awaitNMatches n isConvLeaveNotif melWS - forConcurrently_ (IntMap.findWithDefault [] phase delConvs) $ \convId -> do + pooledForConcurrentlyN_ parallellism (IntMap.findWithDefault [] phase delConvs) $ \convId -> do retry500Once (deleteTeamConversation tid convId convAdmin) >>= assertSuccess - forConcurrently_ (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do + pooledForConcurrentlyN_ parallellism (IntMap.findWithDefault [] phase addMelConvs) $ \convId -> do retry500Once (addMembers convAdmin convId (def {users = [mel]})) >>= assertSuccess void $ awaitNMatches n isConvDeleteNotif melWS - replicateConcurrently n + pooledReplicateConcurrentlyN parallellism n $ createTestConv convAdmin tid [mel] - waitForMigration :: (HasCallStack) => String -> App () - waitForMigration domainM = do - metrics <- - getMetrics domainM BackgroundWorker `bindResponse` \resp -> do - resp.status `shouldMatchInt` 200 - pure $ Text.decodeUtf8 resp.body - let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") - when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do - liftIO $ threadDelay 100_000 - waitForMigration domainM - -- Test Helpers data TestConvList = TestConvList @@ -396,3 +285,52 @@ instance Semigroup TestConvList where delConvs = IntMap.unionWith (<>) l1.delConvs l2.delConvs, addMelConvs = IntMap.unionWith (<>) l1.addMelConvs l2.addMelConvs } + +waitForMigration :: (HasCallStack) => String -> App () +waitForMigration domainM = do + metrics <- + getMetrics domainM BackgroundWorker `bindResponse` \resp -> do + resp.status `shouldMatchInt` 200 + pure $ Text.decodeUtf8 resp.body + let (_, _, _, convFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_local_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + let (_, _, _, userFinishedMatches) :: (Text, Text, Text, [Text]) = (metrics =~ Text.pack "^wire_user_remote_convs_migration_finished\\ ([0-9]+\\.[0-9]+)$") + when (convFinishedMatches /= [Text.pack "1.0"] || userFinishedMatches /= [Text.pack "1.0"]) $ do + liftIO $ threadDelay 100_000 + waitForMigration domainM + +phase1Overrides, phase2Overrides, phase3Overrides, phase4Overrides, phase5Overrides :: ServiceOverrides +phase1Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "cassandra", + backgroundWorkerCfg = setField "migrateConversations" False + } +phase2Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } +phase3Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" True + } +phase4Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "migration-to-postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } +phase5Overrides = + def + { galleyCfg = setField "postgresMigration.conversation" "postgresql", + backgroundWorkerCfg = setField "migrateConversations" False + } + +phaseOverrides :: IntMap ServiceOverrides +phaseOverrides = + IntMap.fromList + [ (1, phase1Overrides), + (2, phase2Overrides), + (3, phase3Overrides), + (4, phase4Overrides), + (5, phase5Overrides) + ] From a6fe1432155e048a0bc6df45b6ef31e6383dba7e Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 15:56:38 +0100 Subject: [PATCH 32/49] changelog --- .../0-release-notes/conversation-migration | 52 +++++++++++++++++++ changelog.d/2-features/conversation-migration | 1 + 2 files changed, 53 insertions(+) create mode 100644 changelog.d/0-release-notes/conversation-migration create mode 100644 changelog.d/2-features/conversation-migration diff --git a/changelog.d/0-release-notes/conversation-migration b/changelog.d/0-release-notes/conversation-migration new file mode 100644 index 0000000000..adab845b2b --- /dev/null +++ b/changelog.d/0-release-notes/conversation-migration @@ -0,0 +1,52 @@ +Starting this release, existing deployments can migrate the conversation data to +PostgreSQL from Cassandra. This is necessary for channel search and management +of channels from the team-management UI. It is highly recommended to take a +backup of the Galley Cassandra before triggerring the migration. + +The migration needs to happen in 3 steps: + +1. Prepare wire-server for migration. + + This step make sure that wire-server keep working as expected during the + migration. To do this deploy wire-server with this config change: + + ```yaml + galley: + config: + postgresqlMigration: + conversation: migrate-to-postgresql + ``` + + This change should restart all the galley pods, any new conversations will + now be written to PostgreSQL. + +2. Trigger the migration and wait. + + This step will actually carry out the migration. To do this deploy + wire-server with this config change: + + ```yaml + background-worker: + config: + migrateConversations: true + ``` + + This change should restart the background-worker pods. It is recommended to + watch the logs and wait for both of these two metrics to report `1.0`: + `wire_local_convs_migration_finished` and `wire_user_remote_convs_migration_finished`. + This can take a long time depending on number of conversations in the DB. + +3. Configure wire-server to only use PostgreSQL for conversations. + + This will be the configuration which must be used from now on for every new + release. + + ```yaml + galley: + config: + postgresqlMigration: + conversation: postgresql + background-worker: + config: + migrateConversations: false + ``` diff --git a/changelog.d/2-features/conversation-migration b/changelog.d/2-features/conversation-migration new file mode 100644 index 0000000000..c8dff09aac --- /dev/null +++ b/changelog.d/2-features/conversation-migration @@ -0,0 +1 @@ +Support migration of all conversation data to Postgresql. \ No newline at end of file From 1e7b7dc8db6b1d5461400e58a538327c4515a94d Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 29 Oct 2025 16:48:27 +0100 Subject: [PATCH 33/49] Fix typos --- charts/background-worker/values.yaml | 2 +- libs/wire-subsystems/src/Wire/ConversationStore.hs | 4 ++-- .../src/Wire/ConversationStore/Cassandra.hs | 10 +++++----- .../src/Wire/ConversationStore/Migration.hs | 2 +- .../src/Wire/ConversationStore/Migration/Cleanup.hs | 6 +++--- .../src/Wire/ConversationStore/Postgres.hs | 2 +- .../background-worker/src/Wire/BackgroundWorker.hs | 4 ++-- .../background-worker/src/Wire/MigrateConversations.hs | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/charts/background-worker/values.yaml b/charts/background-worker/values.yaml index 9b232afc57..1ffa62944d 100644 --- a/charts/background-worker/values.yaml +++ b/charts/background-worker/values.yaml @@ -61,7 +61,7 @@ config: # Setting this to `true` will start conversation migration to postgresql. # # NOTE: It is very important that galley be configured to with - # `settings.postgresMigration.converastion` with `migration-to-postgresql` + # `settings.postgresMigration.conversation` with `migration-to-postgresql` # before setting this to `true`. migrateConversations: false diff --git a/libs/wire-subsystems/src/Wire/ConversationStore.hs b/libs/wire-subsystems/src/Wire/ConversationStore.hs index c88a3439b7..7e8c0c632f 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore.hs @@ -76,7 +76,7 @@ data ConversationStore m a where GetConversationEpoch :: ConvId -> ConversationStore m (Maybe Epoch) GetConversations :: [ConvId] -> ConversationStore m [StoredConversation] GetLocalConversationIds :: UserId -> Maybe ConvId -> Range 1 1000 Int32 -> ConversationStore m (ResultSet ConvId) - GetRemoteConverastionIds :: UserId -> Maybe (Remote ConvId) -> Range 1 1000 Int32 -> ConversationStore m (ResultSet (Remote ConvId)) + GetRemoteConversationIds :: UserId -> Maybe (Remote ConvId) -> Range 1 1000 Int32 -> ConversationStore m (ResultSet (Remote ConvId)) GetConversationMetadata :: ConvId -> ConversationStore m (Maybe ConversationMetadata) GetGroupInfo :: ConvId -> ConversationStore m (Maybe GroupInfoData) -- FUTUREWORK: This is only relevant for Convs in Cassandra, we can delete it @@ -174,7 +174,7 @@ getConversationIdsResultSet lusr maxIds mLastId = do } getRemotes :: Maybe (Remote ConvId) -> Range 1 1000 Int32 -> Sem r (ResultSet (Qualified ConvId)) - getRemotes lastRemote maxRemotes = tUntagged <$$> getRemoteConverastionIds usr lastRemote maxRemotes + getRemotes lastRemote maxRemotes = tUntagged <$$> getRemoteConversationIds usr lastRemote maxRemotes -- | This function only exists because we use the 'MultiTablePage' type for the -- endpoint. Since now the pagination is based on the qualified ids, we can diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index d14e0c7745..a04e19f1f4 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -884,8 +884,8 @@ interpretConversationStoreToCassandra client = interpret $ \case GetLocalConversationIds uid start maxIds -> do logEffect "ConversationStore.GetLocalConversationIds" embedClient client $ getLocalConvIds uid start maxIds - GetRemoteConverastionIds uid start maxIds -> do - logEffect "ConversationStore.GetRemoteConverastionIds" + GetRemoteConversationIds uid start maxIds -> do + logEffect "ConversationStore.GetRemoteConversationIds" embedClient client $ getRemoteConvIds uid start maxIds GetConversationMetadata cid -> do logEffect "ConversationStore.GetConversationMetadata" @@ -1123,12 +1123,12 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case then ResultSetTruncated else ResultSetComplete } - GetRemoteConverastionIds uid start maxIds -> do - logEffect "ConversationStore.GetRemoteConverastionIds" + GetRemoteConversationIds uid start maxIds -> do + logEffect "ConversationStore.GetRemoteConversationIds" withMigrationLockAndCleanup client LockShared (Right uid) $ do isUserInPostgres uid >>= \case False -> embedClient client $ getRemoteConvIds uid start maxIds - True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConverastionIds uid start maxIds + True -> interpretConversationStoreToPostgres $ ConvStore.getRemoteConversationIds uid start maxIds GetConversationMetadata cid -> do logEffect "ConversationStore.GetConversationMetadata" withMigrationLockAndCleanup client LockShared (Left cid) $ diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 78a1866c89..187e03b682 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -407,7 +407,7 @@ getRemoteMemberStatusFromCassandra uid = withCassandra $ do where getAllRemoteConvIds :: [Remote ConvId] -> Maybe (Remote ConvId) -> Sem (ConversationStore ': r) [Remote ConvId] getAllRemoteConvIds acc mLastId = do - res <- getRemoteConverastionIds uid mLastId maxBound + res <- getRemoteConversationIds uid mLastId maxBound let newAcc = res.resultSetResult <> acc case (res.resultSetResult, res.resultSetType) of ([], _) -> pure newAcc diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs index aac51a697d..cbd624beac 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration/Cleanup.hs @@ -97,10 +97,10 @@ deleteRemoteMemberStatusesFromCassandra uid = do delete = "delete from user_remote_conv where user = ?" cleanupIfNecessary :: (PGConstraints r, Member (Input ClientState) r, Member ConversationStore r) => [Either ConvId UserId] -> Sem r () -cleanupIfNecessary = mapM_ (either cleanupConvIfNecessary cleanupUserIfNecesasry) +cleanupIfNecessary = mapM_ (either cleanupConvIfNecessary cleanupUserIfNecessary) -cleanupUserIfNecesasry :: (PGConstraints r, Member (Input ClientState) r) => UserId -> Sem r () -cleanupUserIfNecesasry uid = +cleanupUserIfNecessary :: (PGConstraints r, Member (Input ClientState) r) => UserId -> Sem r () +cleanupUserIfNecessary uid = whenM (isPendingDelete DeleteUser uid) $ do deleteRemoteMemberStatusesFromCassandra uid markDeletionComplete DeleteUser uid diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs index b94008682a..49ecac7064 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Postgres.hs @@ -53,7 +53,7 @@ interpretConversationStoreToPostgres = interpret $ \case GetConversationEpoch cid -> getConversationEpochImpl cid GetConversations cids -> getConversationsImpl cids GetLocalConversationIds uid lastConvId maxIds -> getLocalConversationIdsImpl uid lastConvId maxIds - GetRemoteConverastionIds uid lastConvId maxIds -> getRemoteConversationIdsImpl uid lastConvId maxIds + GetRemoteConversationIds uid lastConvId maxIds -> getRemoteConversationIdsImpl uid lastConvId maxIds GetConversationMetadata cid -> getConversationMetadataImpl cid GetGroupInfo cid -> getGroupInfoImpl cid IsConversationAlive cid -> isConversationAliveImpl cid diff --git a/services/background-worker/src/Wire/BackgroundWorker.hs b/services/background-worker/src/Wire/BackgroundWorker.hs index 9487e07724..2b17c9b6d0 100644 --- a/services/background-worker/src/Wire/BackgroundWorker.hs +++ b/services/background-worker/src/Wire/BackgroundWorker.hs @@ -16,7 +16,7 @@ import Wire.BackgroundWorker.Env import Wire.BackgroundWorker.Health qualified as Health import Wire.BackgroundWorker.Options import Wire.DeadUserNotificationWatcher qualified as DeadUserNotificationWatcher -import Wire.MigrateConversations qualified as MigrateConverastions +import Wire.MigrateConversations qualified as MigrateConversations run :: Opts -> IO () run opts = do @@ -36,7 +36,7 @@ run opts = do then runAppT env $ withNamedLogger "migrate-conversations" $ - MigrateConverastions.startWorker + MigrateConversations.startWorker else pure $ pure () let cleanup = void . runConcurrently $ diff --git a/services/background-worker/src/Wire/MigrateConversations.hs b/services/background-worker/src/Wire/MigrateConversations.hs index 72cb716298..82678e49f9 100644 --- a/services/background-worker/src/Wire/MigrateConversations.hs +++ b/services/background-worker/src/Wire/MigrateConversations.hs @@ -16,7 +16,7 @@ startWorker = do Log.info logger $ Log.msg (Log.val "starting conversation migration") convMigCounter <- register $ counter $ Prometheus.Info "wire_local_convs_migrated_to_pg" "Number of local conversations migrated to Postgresql" - convMigFinished <- register $ counter $ Prometheus.Info "wire_local_convs_migration_finished" "Whether the converastion migateion to Postgresql is finished" + convMigFinished <- register $ counter $ Prometheus.Info "wire_local_convs_migration_finished" "Whether the conversation migateion to Postgresql is finished" userMigCounter <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migrated_to_pg" "Number of users whose remote conversation membership data is migrated to Postgresql" userMigFinished <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migration_finished" "Whether the migration of remote conversation membership data to Postgresql is finished" From b078f34f5df146429b478f44b6dcd33b319d0dc2 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 10:14:38 +0100 Subject: [PATCH 34/49] Fix more typos --- changelog.d/0-release-notes/conversation-migration | 2 +- integration/test/Test/Conversation/Migration.hs | 2 +- libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs | 2 +- services/background-worker/src/Wire/MigrateConversations.hs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/changelog.d/0-release-notes/conversation-migration b/changelog.d/0-release-notes/conversation-migration index adab845b2b..ed753c9e9c 100644 --- a/changelog.d/0-release-notes/conversation-migration +++ b/changelog.d/0-release-notes/conversation-migration @@ -1,7 +1,7 @@ Starting this release, existing deployments can migrate the conversation data to PostgreSQL from Cassandra. This is necessary for channel search and management of channels from the team-management UI. It is highly recommended to take a -backup of the Galley Cassandra before triggerring the migration. +backup of the Galley Cassandra before triggering the migration. The migration needs to happen in 3 steps: diff --git a/integration/test/Test/Conversation/Migration.hs b/integration/test/Test/Conversation/Migration.hs index 2e5b184f55..cf7e0b0511 100644 --- a/integration/test/Test/Conversation/Migration.hs +++ b/integration/test/Test/Conversation/Migration.hs @@ -8,7 +8,7 @@ -- The comments and variable names call these phases by number i.e. Phase1, Phase2, and so on. -- -- The tests are from the perspective of mel, a user on the dynamic backend, --- called backendM (migraing backend). There are also users called mark and mia +-- called backendM (migrating backend). There are also users called mark and mia -- on this backend. module Test.Conversation.Migration where diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs index a04e19f1f4..494013793d 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Cassandra.hs @@ -1275,7 +1275,7 @@ interpretConversationStoreToCassandraAndPostgres client = interpret $ \case -- Save users joining their first remote conv in postgres withMigrationLocksAndCleanup client LockShared (Seconds 2) (Right <$> uids) $ do filterUsersInPostgres uids >>= \pgUids -> do - let -- These are not in Postegres, but that doesn't mean they're in + let -- These are not in Postgres, but that doesn't mean they're in -- cassandra nonPgUids = filter (`notElem` pgUids) uids cassUids <- embedClient client $ haveRemoteConvs nonPgUids diff --git a/services/background-worker/src/Wire/MigrateConversations.hs b/services/background-worker/src/Wire/MigrateConversations.hs index 82678e49f9..37e3e82857 100644 --- a/services/background-worker/src/Wire/MigrateConversations.hs +++ b/services/background-worker/src/Wire/MigrateConversations.hs @@ -16,7 +16,7 @@ startWorker = do Log.info logger $ Log.msg (Log.val "starting conversation migration") convMigCounter <- register $ counter $ Prometheus.Info "wire_local_convs_migrated_to_pg" "Number of local conversations migrated to Postgresql" - convMigFinished <- register $ counter $ Prometheus.Info "wire_local_convs_migration_finished" "Whether the conversation migateion to Postgresql is finished" + convMigFinished <- register $ counter $ Prometheus.Info "wire_local_convs_migration_finished" "Whether the conversation migration to Postgresql is finished" userMigCounter <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migrated_to_pg" "Number of users whose remote conversation membership data is migrated to Postgresql" userMigFinished <- register $ counter $ Prometheus.Info "wire_user_remote_convs_migration_finished" "Whether the migration of remote conversation membership data to Postgresql is finished" From 2a053144264173be57d43f13e88ceb10b1a6a739 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 10:16:23 +0100 Subject: [PATCH 35/49] ConversationStore.Migration: Declare type of `insertConv` --- .../src/Wire/ConversationStore/Migration.hs | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 187e03b682..11a1c58b40 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -12,6 +12,7 @@ import Data.Domain import Data.Id import Data.IntMap qualified as IntMap import Data.Map qualified as Map +import Data.Misc import Data.Qualified import Data.Time import Data.Tuple.Extra @@ -34,6 +35,7 @@ import Polysemy.TinyLog import Prometheus qualified import System.Logger qualified as Log import Wire.API.Conversation hiding (Member) +import Wire.API.Conversation.CellsState import Wire.API.Conversation.Protocol import Wire.API.Conversation.Role import Wire.API.MLS.CipherSuite @@ -250,6 +252,29 @@ saveConvToPostgres allConvData = do storedConv = allConvData.conv -- In all these queries we do nothing on conflict because if the data is in -- Postgres it is considered fresher and data from Cassandra is ignored. + insertConv :: + Hasql.Statement + ( ConvId, + ConvType, + Maybe UserId, + Vector Access, + Imports.Set AccessRole, + Maybe Text, + Maybe TeamId, + Maybe Milliseconds, + Maybe ReceiptMode, + ProtocolTag, + Maybe GroupId, + Maybe Epoch, + Maybe UTCTime, + Maybe CipherSuiteTag, + Maybe GroupInfoData, + Maybe GroupConvType, + Maybe AddPermission, + CellsState, + Maybe ConvId + ) + () insertConv = lmapPG @_ @(_, _, _, Vector Int32, Vector Int32, _, _, _, _, _, _, _, _, _, _, _, _, _, _) [resultlessStatement|INSERT INTO conversation From 58ed0ed5c0f184089b90d19e3cb91de3b933f21f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 10:24:27 +0100 Subject: [PATCH 36/49] Galley.Options: Fix error message --- services/galley/src/Galley/Options.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/galley/src/Galley/Options.hs b/services/galley/src/Galley/Options.hs index 6c72a7dbc9..0aba1f8ca2 100644 --- a/services/galley/src/Galley/Options.hs +++ b/services/galley/src/Galley/Options.hs @@ -204,7 +204,7 @@ instance FromJSON StorageLocation where "cassandra" -> pure CassandraStorage "migration-to-postgresql" -> pure MigrationToPostgresql "postgresql" -> pure PostgresqlStorage - x -> fail $ "Invalid storage location: " <> Text.unpack x <> ". Valid options: cassandra, postgresql" + x -> fail $ "Invalid storage location: " <> Text.unpack x <> ". Valid options: cassandra, postgresql, migration-to-postgresql" data PostgresMigrationOpts = PostgresMigrationOpts { conversation :: StorageLocation From 972168bdf01a1c3b3e970155ce79669c82cb67d3 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 11:51:37 +0100 Subject: [PATCH 37/49] integration-setup: Configure cassandraGalley and postgres for background-worker --- hack/helm_vars/wire-server/values.yaml.gotmpl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hack/helm_vars/wire-server/values.yaml.gotmpl b/hack/helm_vars/wire-server/values.yaml.gotmpl index f675fc761f..71e0f74925 100644 --- a/hack/helm_vars/wire-server/values.yaml.gotmpl +++ b/hack/helm_vars/wire-server/values.yaml.gotmpl @@ -604,6 +604,11 @@ background-worker: pushBackoffMinWait: 1000 # 1ms pushBackoffMaxWait: 500000 # 0.5s remotesRefreshInterval: 1000000 # 1s + postgresql: + host: "postgresql" + port: "5432" + user: wire-server + dbname: wire-server cassandra: host: {{ .Values.cassandraHost }} replicaCount: 1 @@ -612,6 +617,14 @@ background-worker: name: "cassandra-jks-keystore" key: "ca.crt" {{- end }} + cassandraGalley: + host: {{ .Values.cassandraHost }} + replicaCount: 1 + {{- if .Values.useK8ssandraSSL.enabled }} + tlsCaSecretRef: + name: "cassandra-jks-keystore" + key: "ca.crt" + {{- end }} rabbitmq: port: 5671 adminPort: 15671 @@ -621,6 +634,7 @@ background-worker: name: "rabbitmq-certificate" key: "ca.crt" secrets: + pgPassword: "posty-the-gres" rabbitmq: username: {{ .Values.rabbitmqUsername }} password: {{ .Values.rabbitmqPassword }} From eef622bdf1ed1ba770f457ebce836b4bbf26225b Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 13:30:38 +0100 Subject: [PATCH 38/49] integration-setup: Correct set max_connections for postgresql --- hack/helm_vars/postgresql/values.yaml.gotmpl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hack/helm_vars/postgresql/values.yaml.gotmpl b/hack/helm_vars/postgresql/values.yaml.gotmpl index e1a20104c1..4e577911ad 100644 --- a/hack/helm_vars/postgresql/values.yaml.gotmpl +++ b/hack/helm_vars/postgresql/values.yaml.gotmpl @@ -8,5 +8,6 @@ resources: limits: cpu: "1" postgresql: - configuration: | - max_connections = 500 + primary: + configuration: | + max_connections = 500 From 3b51ab0af419db8c7a672f52533e4d9e39dfc3fd Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 15:46:48 +0100 Subject: [PATCH 39/49] galley-integration: Change assertions about pagination Because sometimes we don't have to get an empty page to realize we're at the end --- services/galley/test/integration/API.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/galley/test/integration/API.hs b/services/galley/test/integration/API.hs index 94870c94c9..1fb7e01acf 100644 --- a/services/galley/test/integration/API.hs +++ b/services/galley/test/integration/API.hs @@ -1648,7 +1648,7 @@ paginateConvListIds = do -- 1 Proteus self conv + 1 MLS self conv + 2 convs with bob and eve + 196 -- local convs + 25 convs on chad.example.com + 31 on dee.example = 256 convs. -- Getting them 16 at a time should get all them in 16 times. - foldM_ (getChunkedConvs 16 0 alice) Nothing [16, 15 .. 0 :: Int] + foldM_ (getChunkedConvs 16 16 alice) Nothing [15, 14 .. 0 :: Int] -- This test ensures to setup conversations so that a page would end exactly -- when local convs are exhausted and then exactly when another remote domain's @@ -1710,7 +1710,7 @@ paginateConvListIdsPageEndingAtLocalsAndDomain = do } void $ runFedClient @"on-conversation-updated" fedGalleyClient deeDomain cu - foldM_ (getChunkedConvs 16 0 alice) Nothing [4, 3, 2, 1, 0 :: Int] + foldM_ (getChunkedConvs 16 16 alice) Nothing [3, 2, 1, 0 :: Int] -- | Gets chunked conversation ids given size of each chunk, size of the last -- chunk, requesting user and @n@ which represents how many chunks are remaining From 192ce2079cd6d2aab9de3d4538a6f732741759a8 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 30 Oct 2025 15:51:41 +0100 Subject: [PATCH 40/49] integration-setup: More CPU/memory for postgresql Also stop any throttling, OOMKilling --- hack/helm_vars/postgresql/values.yaml.gotmpl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/hack/helm_vars/postgresql/values.yaml.gotmpl b/hack/helm_vars/postgresql/values.yaml.gotmpl index 4e577911ad..c563b876d8 100644 --- a/hack/helm_vars/postgresql/values.yaml.gotmpl +++ b/hack/helm_vars/postgresql/values.yaml.gotmpl @@ -2,12 +2,11 @@ auth: postgresPassword: "posty-the-gres" username: wire-server password: "posty-the-gres" -resources: - requests: - cpu: "1" - limits: - cpu: "1" -postgresql: - primary: - configuration: | - max_connections = 500 +primary: + resources: + requests: + cpu: 1 + memory: 2Gi + limits: {} + extendedConfiguration: | + max_connections = 500 From 0cb0040cc02e37771a5e2b47f1b8ba0fc4c790be Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Mon, 3 Nov 2025 09:53:40 +0100 Subject: [PATCH 41/49] integration-setup: Allow even more connections to postgresql --- hack/helm_vars/postgresql/values.yaml.gotmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/helm_vars/postgresql/values.yaml.gotmpl b/hack/helm_vars/postgresql/values.yaml.gotmpl index c563b876d8..f6a9456280 100644 --- a/hack/helm_vars/postgresql/values.yaml.gotmpl +++ b/hack/helm_vars/postgresql/values.yaml.gotmpl @@ -9,4 +9,4 @@ primary: memory: 2Gi limits: {} extendedConfiguration: | - max_connections = 500 + max_connections = 1500 From 8e886a7b65c7923209401cda70f221978876c644 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 4 Nov 2025 10:21:15 +0100 Subject: [PATCH 42/49] ConversationStore.Migration: Add name of the migration to all logs --- .../src/Wire/ConversationStore/Migration.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs index 11a1c58b40..be6980c67c 100644 --- a/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs +++ b/libs/wire-subsystems/src/Wire/ConversationStore/Migration.hs @@ -52,6 +52,7 @@ import Wire.ConversationStore.Migration.Cleanup import Wire.ConversationStore.Migration.Types import Wire.ConversationStore.MigrationLock import Wire.Postgres +import Wire.Sem.Logger (mapLogger) import Wire.Sem.Logger.TinyLog (loggerToTinyLog) import Wire.Sem.Paging.Cassandra import Wire.StoredConversation @@ -92,22 +93,22 @@ migrationLoop cassClient pgPool logger name migFinished migration = do runMigration :: IO Int runMigration = fmap fst - . interpreter cassClient pgPool logger + . interpreter cassClient pgPool logger name $ runConduit migration -interpreter :: ClientState -> Hasql.Pool -> Log.Logger -> Sem EffectStack a -> IO (Int, a) -interpreter cassClient pgPool logger = +interpreter :: ClientState -> Hasql.Pool -> Log.Logger -> ByteString -> Sem EffectStack a -> IO (Int, a) +interpreter cassClient pgPool logger name = runFinal . embedToFinal . loggerToTinyLog logger + . mapLogger (Log.field "migration" name .) + . raiseUnder . interpretRace . asyncToIOFinal . runInputConst pgPool . runInputConst cassClient . runState 0 --- * Paginated Migration - pageSize :: Int32 pageSize = 10000 From fb5cad7cd6367e1cfbb279f37e525fc2c0f2a04f Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 4 Nov 2025 10:32:41 +0100 Subject: [PATCH 43/49] charts/integration: Mount background-worker secrets --- charts/integration/templates/integration-integration.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/charts/integration/templates/integration-integration.yaml b/charts/integration/templates/integration-integration.yaml index 3d56b93158..f59475280e 100644 --- a/charts/integration/templates/integration-integration.yaml +++ b/charts/integration/templates/integration-integration.yaml @@ -60,10 +60,15 @@ spec: - name: "federator-ca" configMap: name: "federator-ca" + - name: "background-worker-config" configMap: name: "background-worker" + - name: "background-worker-secrets" + secret: + secretName: "background-worker" + - name: "stern-config" configMap: name: "backoffice" @@ -250,6 +255,9 @@ spec: - name: background-worker-config mountPath: /etc/wire/background-worker/conf + - name: background-worker-secrets + mountPath: /etc/wire/background-worker/secrets + - name: stern-config mountPath: /etc/wire/stern/conf From 0f3c1a2d1d6153fc6b1cc56e34a58f21b9ce6cec Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 4 Nov 2025 10:53:39 +0100 Subject: [PATCH 44/49] integration/Test.Conv.Migration: Create fewer other convs so tests run faster --- integration/test/Test/Conversation/Migration.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration/test/Test/Conversation/Migration.hs b/integration/test/Test/Conversation/Migration.hs index cf7e0b0511..c694e1b4fe 100644 --- a/integration/test/Test/Conversation/Migration.hs +++ b/integration/test/Test/Conversation/Migration.hs @@ -153,9 +153,9 @@ testMigrationToPostgresProteus = do otherMelConvs <- getAllConvIds mel 100 -- Other convs which just exist - pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv mia miaTid [] - pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv alice aliceTid [mia] - pooledReplicateConcurrentlyN_ parallellism 500 $ createTestConv bob bobTid [mia] + pooledReplicateConcurrentlyN_ parallellism 100 $ createTestConv mia miaTid [] + pooledReplicateConcurrentlyN_ parallellism 100 $ createTestConv alice aliceTid [mia] + pooledReplicateConcurrentlyN_ parallellism 100 $ createTestConv bob bobTid [mia] domainAConvs <- createTestConvs alice aliceTid mel mark [] domainBConvs <- createTestConvs bob bobTid mel mark [] From 3955081830ca40b55128f2e1b4a00261c7066f45 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 4 Nov 2025 15:34:52 +0100 Subject: [PATCH 45/49] integration: Set cassandra keyspace for dynamic cannons --- integration/test/Testlib/ModService.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration/test/Testlib/ModService.hs b/integration/test/Testlib/ModService.hs index c7d11bdb0f..84d9459a3c 100644 --- a/integration/test/Testlib/ModService.hs +++ b/integration/test/Testlib/ModService.hs @@ -221,7 +221,9 @@ startDynamicBackend resource beOverrides = do gundeckCfg = setField "cassandra.keyspace" resource.berGundeckKeyspace, backgroundWorkerCfg = setField "cassandra.keyspace" resource.berGundeckKeyspace - >=> setField "cassandraGalley.keyspace" resource.berGalleyKeyspace + >=> setField "cassandraGalley.keyspace" resource.berGalleyKeyspace, + cannonCfg = + setField "cassandra.keyspace" resource.berGundeckKeyspace } setPgDb :: ServiceOverrides setPgDb = From c8a2a5557b0d645d8c620241f9243084440e06f2 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Tue, 4 Nov 2025 16:13:06 +0100 Subject: [PATCH 46/49] integration-setup: Allow tests to run for 5 more mins --- hack/bin/integration-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/bin/integration-test.sh b/hack/bin/integration-test.sh index d1f1c02931..6620005c0e 100755 --- a/hack/bin/integration-test.sh +++ b/hack/bin/integration-test.sh @@ -63,7 +63,7 @@ kubectl -n wire-federation-v1 get secrets rabbitmq -ojson | jq 'del(.metadata.na mkdir -p ~/.parallel && touch ~/.parallel/will-cite printf '%s\n' "${tests[@]}" | parallel echo "Running helm tests for {}..." printf '%s\n' "${tests[@]}" | parallel -P "${HELM_PARALLELISM}" \ - helm test -n "${NAMESPACE}" "${CHART}" --timeout 900s --filter name="${CHART}-{}-integration" '> logs-{};' \ + helm test -n "${NAMESPACE}" "${CHART}" --timeout 1200s --filter name="${CHART}-{}-integration" '> logs-{};' \ echo '$? > stat-{};' \ echo "==== Done testing {}. ====" '};' \ kubectl -n "${NAMESPACE}" logs "${CHART}-{}-integration" '>> logs-{};' From 699cad2cebf78a943897e74eddd953ac164dd9be Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 5 Nov 2025 09:47:58 +0100 Subject: [PATCH 47/49] galley-integration: Produce better error message --- services/galley/test/integration/API.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/galley/test/integration/API.hs b/services/galley/test/integration/API.hs index 1fb7e01acf..bcce0b772f 100644 --- a/services/galley/test/integration/API.hs +++ b/services/galley/test/integration/API.hs @@ -1766,7 +1766,7 @@ getConvsPagingOk = do =<< getConvs u ids1 Date: Wed, 5 Nov 2025 09:51:00 +0100 Subject: [PATCH 48/49] docs: Add steps for migrations --- .../src/developer/reference/config-options.md | 60 ++++++++++++++++++- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/docs/src/developer/reference/config-options.md b/docs/src/developer/reference/config-options.md index 5ca0820dca..9b96215b89 100644 --- a/docs/src/developer/reference/config-options.md +++ b/docs/src/developer/reference/config-options.md @@ -1683,10 +1683,9 @@ used as `password` field. ### Using PostgreSQL for storing conversation data -This is currently not the default and is experimental. -The migration path from Cassandra is yet to be programmed. +#### New Installations -However, new installations can use this by configuring the wire-server helm +New installations can use this by configuring the wire-server helm chart like this: ```yaml @@ -1696,6 +1695,61 @@ galley: conversation: postgresql ``` +#### Migration for existing installations + +Existing installations should migrate the conversation data to PostgreSQL from +Cassandra. This is necessary for channel search and management of channels from +the team-management UI. It is highly recommended to take a backup of the Galley +Cassandra before triggering the migration. + +The migration needs to happen in 3 steps: + +1. Prepare wire-server for migration. + + This step make sure that wire-server keep working as expected during the + migration. To do this deploy wire-server with this config change: + + ```yaml + galley: + config: + postgresqlMigration: + conversation: migrate-to-postgresql + ``` + + This change should restart all the galley pods, any new conversations will + now be written to PostgreSQL. + +2. Trigger the migration and wait. + + This step will actually carry out the migration. To do this deploy + wire-server with this config change: + + ```yaml + background-worker: + config: + migrateConversations: true + ``` + + This change should restart the background-worker pods. It is recommended to + watch the logs and wait for both of these two metrics to report `1.0`: + `wire_local_convs_migration_finished` and `wire_user_remote_convs_migration_finished`. + This can take a long time depending on number of conversations in the DB. + +3. Configure wire-server to only use PostgreSQL for conversations. + + This will be the configuration which must be used from now on for every new + release. + + ```yaml + galley: + config: + postgresqlMigration: + conversation: postgresql + background-worker: + config: + migrateConversations: false + ``` + ## Configure Cells If Cells integration is enabled, gundeck must be configured with the name of From 4a31f48ca428eed25d771bc678223238e575d496 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 5 Nov 2025 11:13:51 +0100 Subject: [PATCH 49/49] galley-integration: Relax the requirement that getConvs returns convs in same order as IDs --- services/galley/test/integration/API.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/galley/test/integration/API.hs b/services/galley/test/integration/API.hs index bcce0b772f..8e38c31ce1 100644 --- a/services/galley/test/integration/API.hs +++ b/services/galley/test/integration/API.hs @@ -1766,7 +1766,7 @@ getConvsPagingOk = do =<< getConvs u ids1