mattermost-community-enterp.../channels/app/shared_channel_membership_sync_self_referential_test.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

1954 lines
70 KiB
Go

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package app
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"slices"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/mattermost/mattermost/server/public/model"
"github.com/mattermost/mattermost/server/v8/platform/services/remotecluster"
"github.com/mattermost/mattermost/server/v8/platform/services/sharedchannel"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSharedChannelMembershipSyncSelfReferential(t *testing.T) {
th := setupSharedChannels(t).InitBasic()
defer th.TearDown()
ss := th.App.Srv().Store()
// Get the shared channel service and cast to concrete type to access SyncAllChannelMembers
scsInterface := th.App.Srv().GetSharedChannelSyncService()
service, ok := scsInterface.(*sharedchannel.Service)
require.True(t, ok, "Expected sharedchannel.Service concrete type")
// Force the service to be active
err := service.Start()
require.NoError(t, err)
// Wait for service to be fully active
require.Eventually(t, func() bool {
return service.Active()
}, 5*time.Second, 100*time.Millisecond, "SharedChannelService should be active")
// Also ensure the remote cluster service is running so callbacks work
rcService := th.App.Srv().GetRemoteClusterService()
if rcService != nil {
_ = rcService.Start()
// Force the service to be active in test environment
if rc, ok := rcService.(*remotecluster.Service); ok {
rc.SetActive(true)
}
// Wait for remote cluster service to be active
require.Eventually(t, func() bool {
return rcService.Active()
}, 5*time.Second, 100*time.Millisecond, "RemoteClusterService should be active")
}
t.Run("Test 1: Automatic sync on membership changes", func(t *testing.T) {
// This test verifies that membership sync happens automatically when users are added or removed from a shared channel.
// The sync is triggered by HandleMembershipChange which is called automatically by AddUserToChannel and RemoveUserFromChannel.
// The test ensures that sync messages are sent asynchronously after a minimum delay for both add and remove operations.
EnsureCleanState(t, th, ss)
// Track sync messages received
var syncMessageCount int32
var syncHandler *SelfReferentialSyncHandler
// Create a test HTTP server that acts as the "remote" cluster
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&syncMessageCount, 1)
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
} else {
writeOKResponse(w)
}
}))
defer testServer.Close()
// Create a shared channel
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "auto-sync-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create a self-referential remote cluster
selfCluster := &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Initialize sync handler
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
// Share the channel with our self-referential cluster
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Refresh the channel object to get the updated Shared field
channel, appErr := th.App.GetChannel(th.Context, channel.Id)
require.Nil(t, appErr)
require.True(t, channel.IsShared(), "Channel should be marked as shared")
// Create a user and add to team
user := th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
// Add user to channel - this triggers HandleMembershipChange automatically
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
// Wait for the user to be locally added before checking for sync
require.Eventually(t, func() bool {
_, memberErr := ss.Channel().GetMember(th.Context, channel.Id, user.Id)
return memberErr == nil
}, 5*time.Second, 100*time.Millisecond, "User should be locally added to channel")
// Wait for async sync with more generous timeout (minimum delay is 2 seconds + async task processing)
require.Eventually(t, func() bool {
count := atomic.LoadInt32(&syncMessageCount)
return count > 0
}, 15*time.Second, 200*time.Millisecond, "Should have received at least one sync message via automatic sync")
// Wait for async task queue to be processed and sync to complete
require.Eventually(t, func() bool {
return !service.HasPendingTasksForTesting()
}, 10*time.Second, 200*time.Millisecond, "All async sync tasks should be completed")
// Verify the user is a member at the receiver end
member, memberErr := ss.Channel().GetMember(th.Context, channel.Id, user.Id)
require.NoError(t, memberErr)
require.Equal(t, user.Id, member.UserId)
// Reset sync counter and wait for background tasks to settle
var initialCount int32
require.Eventually(t, func() bool {
initialCount = atomic.LoadInt32(&syncMessageCount)
return !service.HasPendingTasksForTesting()
}, 5*time.Second, 100*time.Millisecond, "Background tasks should settle before removal test")
// Remove user from channel - this should also trigger automatic sync
appErr = th.App.RemoveUserFromChannel(th.Context, user.Id, th.BasicUser.Id, channel)
require.Nil(t, appErr)
// Wait for removal sync with increased timeout
require.Eventually(t, func() bool {
count := atomic.LoadInt32(&syncMessageCount)
return count > initialCount
}, 20*time.Second, 200*time.Millisecond, "Should have received sync message for user removal")
// Wait for async task queue to be processed after removal
require.Eventually(t, func() bool {
return !service.HasPendingTasksForTesting()
}, 15*time.Second, 200*time.Millisecond, "All async removal tasks should be completed")
// Wait for the removal to be processed with extended timeout
require.Eventually(t, func() bool {
_, err = ss.Channel().GetMember(th.Context, channel.Id, user.Id)
return err != nil
}, 30*time.Second, 300*time.Millisecond, "User should not be a member after removal")
})
t.Run("Test 2: Batch membership sync with user type filtering", func(t *testing.T) {
EnsureCleanState(t, th, ss)
// This test verifies batch sync of multiple members and proper filtering of user types
var batchedUserIDs [][]string
var mu sync.Mutex
var selfCluster *model.RemoteCluster
var syncCompleted atomic.Bool
// Placeholder for sync handler - will be created after selfCluster is initialized
var syncHandler *SelfReferentialSyncHandler
// Create test HTTP server
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
} else {
writeOKResponse(w)
}
}))
defer testServer.Close()
// Create channel but DON'T share it yet
channel := th.CreateChannel(th.Context, th.BasicTeam)
// Create self-referential remote cluster
selfCluster = &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-batch",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
batchSize := service.GetMemberSyncBatchSize()
// First, add multiple users to the channel BEFORE sharing
// Include different user types to test filtering
numRegularUsers := (batchSize * 2) + 5 // Back to original
regularUserIDs := make([]string, numRegularUsers)
for i := range numRegularUsers {
user := th.CreateUser()
regularUserIDs[i] = user.Id
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
}
// Add users that should be synced (including bots and system admins)
// Add a bot
bot := th.CreateBot()
botUser, appErr := th.App.GetUser(bot.UserId)
require.Nil(t, appErr)
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, bot.UserId, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, botUser, channel, false)
require.Nil(t, appErr)
// Add a system admin
systemAdmin := th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, systemAdmin.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.UpdateUserRoles(th.Context, systemAdmin.Id, model.SystemAdminRoleId+" "+model.SystemUserRoleId, false)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, systemAdmin, channel, false)
require.Nil(t, appErr)
// Add a guest user (should be synced)
guest := th.CreateGuest()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, guest.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, guest, channel, false)
require.Nil(t, appErr)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "batch-sync-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, err = th.App.ShareChannel(th.Context, sc)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Initialize sync handler with callbacks
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
// Track when all expected batches are received
// Should include regular users + guest + BasicUser + bot + system admin
expectedTotal := numRegularUsers + 1 + 1 + 1 + 1 // regular users + guest + BasicUser + bot + system admin
expectedBatches := (expectedTotal + batchSize - 1) / batchSize
syncHandler.OnBatchSync = func(userIds []string, messageNumber int32) {
mu.Lock()
batchedUserIDs = append(batchedUserIDs, userIds)
// Check if we've received all expected batches
if len(batchedUserIDs) >= expectedBatches {
syncCompleted.Store(true)
}
mu.Unlock()
}
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for batch messages to be received with more robust checking
require.Eventually(t, func() bool {
return syncCompleted.Load()
}, 30*time.Second, 200*time.Millisecond, fmt.Sprintf("Should receive %d batch sync messages", expectedBatches))
// Wait for all async processing to complete
require.Eventually(t, func() bool {
return !service.HasPendingTasksForTesting()
}, 15*time.Second, 200*time.Millisecond, "All async tasks should be completed")
// Verify cursor was updated - wait longer for batch processing
require.Eventually(t, func() bool {
updatedScr, getErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
return getErr == nil && updatedScr.LastMembersSyncAt > 0
}, 20*time.Second, 200*time.Millisecond, "Cursor should be updated after batch sync")
// Verify sync messages were sent
count := syncHandler.GetSyncMessageCount()
assert.Greater(t, count, int32(0), "Should have received sync messages")
// Check batch contents with proper locking
mu.Lock()
totalSynced := 0
allSyncedUserIDs := make(map[string]bool)
actualBatches := len(batchedUserIDs)
for _, batch := range batchedUserIDs {
totalSynced += len(batch)
for _, userID := range batch {
allSyncedUserIDs[userID] = true
}
}
mu.Unlock()
// Verify exact batch count
assert.Equal(t, expectedBatches, actualBatches, fmt.Sprintf("Should have exactly %d batches with batch size %d", expectedBatches, batchSize))
// Verify total synced users
assert.Equal(t, expectedTotal, totalSynced, "All users including bots and system admins should be synced")
// Verify that bot and system admin WERE synced
assert.Contains(t, allSyncedUserIDs, bot.UserId, "Bot should be synced")
assert.Contains(t, allSyncedUserIDs, systemAdmin.Id, "System admin should be synced")
// Verify that guest WAS synced
assert.Contains(t, allSyncedUserIDs, guest.Id, "Guest user should be synced")
// Verify that regular users were synced
for _, regularUserID := range regularUserIDs {
assert.Contains(t, allSyncedUserIDs, regularUserID, "Regular user should be synced")
}
})
t.Run("Test 3: Cursor management", func(t *testing.T) {
// This test verifies incremental sync using cursor timestamps.
// The cursor (LastMembersSyncAt) tracks the last sync time to ensure only new/modified
// memberships are synced on subsequent calls, avoiding redundant syncs.
// We test that:
// 1. Initial sync includes all existing members and updates the cursor
// 2. Subsequent syncs only include members added after the cursor timestamp
// 3. Previously synced members are not re-synced (incremental behavior)
EnsureCleanState(t, th, ss)
var syncedInFirstCall []string
var syncedInSecondCall []string
var mu sync.Mutex
var selfCluster *model.RemoteCluster
var svc *sharedchannel.Service
var syncHandler *SelfReferentialSyncHandler
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
} else {
writeOKResponse(w)
}
}))
defer testServer.Close()
// Create and share channel
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "cursor-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create self-referential remote cluster
selfCluster = &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-cursor",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
scr, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Refresh the channel object to get the updated Shared field
channel, appErr := th.App.GetChannel(th.Context, channel.Id)
require.Nil(t, appErr)
require.True(t, channel.IsShared(), "Channel should be marked as shared")
// Add first batch of users
user1 := th.CreateUser()
user2 := th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user1.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user1, channel, false)
require.Nil(t, appErr)
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user2.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user2, channel, false)
require.Nil(t, appErr)
// Get the shared channel service
scsInterface := th.App.Srv().GetSharedChannelSyncService()
var ok bool
svc, ok = scsInterface.(*sharedchannel.Service)
require.True(t, ok, "Expected sharedchannel.Service concrete type")
// Force the service to be active
err = svc.Start()
require.NoError(t, err)
// Create sync handler with callbacks
syncHandler = NewSelfReferentialSyncHandler(t, svc, selfCluster)
syncHandler.OnIndividualSync = func(userId string, messageNumber int32) {
mu.Lock()
defer mu.Unlock()
if messageNumber <= 2 { // First sync
syncedInFirstCall = append(syncedInFirstCall, userId)
} else { // Second sync
syncedInSecondCall = append(syncedInSecondCall, userId)
}
}
// First sync
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for first sync to complete
require.Eventually(t, func() bool {
mu.Lock()
defer mu.Unlock()
return len(syncedInFirstCall) >= 2
}, 10*time.Second, 100*time.Millisecond, "First sync should complete with initial users")
// Wait for cursor to be updated after first sync
var firstSyncCursor int64
require.Eventually(t, func() bool {
scr, err = ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
if err != nil {
return false
}
firstSyncCursor = scr.LastMembersSyncAt
return firstSyncCursor > 0
}, 10*time.Second, 100*time.Millisecond, "Cursor should be updated after first sync")
// Add another user after cursor update
user3 := th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user3.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user3, channel, false)
require.Nil(t, appErr)
// Second sync - should only sync user3
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for second sync to complete
require.Eventually(t, func() bool {
mu.Lock()
defer mu.Unlock()
return len(syncedInSecondCall) >= 1
}, 10*time.Second, 100*time.Millisecond, "Second sync should complete with new user")
// Wait for cursor to be updated after second sync
var secondSyncCursor int64
require.Eventually(t, func() bool {
scr, err = ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
if err != nil {
return false
}
secondSyncCursor = scr.LastMembersSyncAt
return secondSyncCursor > firstSyncCursor
}, 10*time.Second, 100*time.Millisecond, "Cursor should advance after second sync")
// Verify incremental sync
assert.GreaterOrEqual(t, len(syncedInFirstCall), 2, "First sync should include initial users")
assert.Contains(t, syncedInSecondCall, user3.Id, "Second sync should include only new user")
assert.NotContains(t, syncedInSecondCall, user1.Id, "Second sync should not re-sync existing users")
assert.NotContains(t, syncedInSecondCall, user2.Id, "Second sync should not re-sync existing users")
})
t.Run("Test 4: Sync failure and recovery", func(t *testing.T) {
t.Skip("MM-64687")
// This test verifies that membership sync handles remote server failures gracefully
// and successfully syncs members once the remote server recovers.
// We test that:
// 1. Sync attempts are made even when the remote server returns errors
// 2. No members are synced during failure mode
// 3. Once the server recovers, sync completes successfully
EnsureCleanState(t, th, ss)
var syncAttempts int32
var failureMode atomic.Bool
failureMode.Store(true)
var successfulSyncs []string
var selfCluster *model.RemoteCluster
var syncHandler *SelfReferentialSyncHandler
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/v4/remotecluster/msg" {
atomic.AddInt32(&syncAttempts, 1)
if failureMode.Load() {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(`{"error":"simulated failure"}`))
return
}
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
} else {
writeOKResponse(w)
}
} else {
writeOKResponse(w)
}
}))
defer testServer.Close()
// Create and share channel
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "failure-recovery-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create self-referential remote cluster
selfCluster = &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-failure",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Initialize sync handler with callbacks
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
syncHandler.OnBatchSync = func(userIds []string, messageNumber int32) {
successfulSyncs = append(successfulSyncs, userIds...)
}
syncHandler.OnIndividualSync = func(userId string, messageNumber int32) {
successfulSyncs = append(successfulSyncs, userId)
}
// Add a user to sync
testUser := th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, testUser.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, testUser, channel, false)
require.Nil(t, appErr)
// First sync attempt - will fail
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for first sync attempt with more robust checking
require.Eventually(t, func() bool {
attempts := atomic.LoadInt32(&syncAttempts)
return attempts > 0
}, 15*time.Second, 100*time.Millisecond, "Should have attempted sync during failure mode")
initialAttempts := atomic.LoadInt32(&syncAttempts)
assert.Greater(t, initialAttempts, int32(0), "Should have attempted sync")
assert.Empty(t, successfulSyncs, "No successful syncs during failure mode")
// Recover from failure mode
failureMode.Store(false)
// Second sync attempt - should succeed
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for successful sync with more robust checking
require.Eventually(t, func() bool {
return slices.Contains(successfulSyncs, testUser.Id)
}, 15*time.Second, 100*time.Millisecond, "Should have successful sync after recovery")
// Verify recovery
finalAttempts := atomic.LoadInt32(&syncAttempts)
assert.Greater(t, finalAttempts, initialAttempts, "Should have retried after recovery")
})
t.Run("Test 5: Manual sync with cursor management", func(t *testing.T) {
// This test verifies manual sync using SyncAllChannelMembers with complete cursor management:
// 1. Initial sync of 10 users with cursor tracking
// 2. Mixed operations: remove 3 users and add 5 new users
// 3. Verifies all operations are properly synced and cursor is updated correctly
// 4. Validates that the LastMembersSyncAt cursor advances after each sync operation
EnsureCleanState(t, th, ss)
var totalSyncMessages int32
var addOperations int32
var removeOperations int32
var selfCluster *model.RemoteCluster
// Create sync handler
syncHandler := &SelfReferentialSyncHandler{
t: t,
service: service,
selfCluster: nil, // Will be set later
syncMessageCount: &totalSyncMessages,
}
// Create test HTTP server
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/v4/remotecluster/msg" {
bodyBytes, _ := io.ReadAll(r.Body)
// Count add and remove operations
var frame model.RemoteClusterFrame
if json.Unmarshal(bodyBytes, &frame) == nil {
var syncMsg model.SyncMsg
if json.Unmarshal(frame.Msg.Payload, &syncMsg) == nil && frame.Msg.Topic == "sharedchannel_membership" {
// Count membership changes from the unified field
for _, change := range syncMsg.MembershipChanges {
if change.IsAdd {
atomic.AddInt32(&addOperations, 1)
} else {
atomic.AddInt32(&removeOperations, 1)
}
}
}
}
// Restore body and handle with sync handler
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
if selfCluster != nil {
syncHandler.selfCluster = selfCluster
syncHandler.HandleRequest(w, r)
return
}
}
writeOKResponse(w)
}))
defer testServer.Close()
// Create and share channel
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "full-sync-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create self-referential remote cluster
selfCluster = &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-full",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
syncHandler.selfCluster = selfCluster
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Refresh the channel object to get the updated Shared field
channel, appErr := th.App.GetChannel(th.Context, channel.Id)
require.Nil(t, appErr)
require.True(t, channel.IsShared(), "Channel should be marked as shared")
// Phase 1: Add initial batch of users
initialUsers := make([]*model.User, 10)
for i := range 10 {
initialUsers[i] = th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, initialUsers[i].Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, initialUsers[i], channel, false)
require.Nil(t, appErr)
}
// Get initial cursor value
initialScr, scrErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
require.NoError(t, scrErr)
initialCursor := initialScr.LastMembersSyncAt
// Initial sync
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for initial sync to complete
require.Eventually(t, func() bool {
return atomic.LoadInt32(&addOperations) >= 10
}, 10*time.Second, 100*time.Millisecond, "Should sync all initial users")
initialAdds := atomic.LoadInt32(&addOperations)
assert.GreaterOrEqual(t, initialAdds, int32(10), "Should sync all initial users")
// Verify cursor was updated after initial sync
require.Eventually(t, func() bool {
updatedScr, getErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
return getErr == nil && updatedScr.LastMembersSyncAt > initialCursor
}, 10*time.Second, 100*time.Millisecond, "Cursor should be updated after initial sync")
// Get cursor after initial sync
afterInitialScr, getErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
require.NoError(t, getErr)
cursorAfterInitial := afterInitialScr.LastMembersSyncAt
// Phase 2: Mixed operations - remove some, add new ones
// Remove first 3 users
for i := range 3 {
appErr := th.App.RemoveUserFromChannel(th.Context, initialUsers[i].Id, th.SystemAdminUser.Id, channel)
require.Nil(t, appErr)
}
// Add 5 new users
newUsers := make([]*model.User, 5)
for i := range 5 {
newUsers[i] = th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, newUsers[i].Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, newUsers[i], channel, false)
require.Nil(t, appErr)
}
// Sync mixed changes
previousMessages := atomic.LoadInt32(&totalSyncMessages)
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for mixed changes sync to complete
require.Eventually(t, func() bool {
messages := atomic.LoadInt32(&totalSyncMessages)
removes := atomic.LoadInt32(&removeOperations)
return messages > previousMessages && removes >= 3
}, 10*time.Second, 100*time.Millisecond, "Should sync mixed changes")
// Verify cursor was updated after mixed operations sync
require.Eventually(t, func() bool {
finalScr, finalErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
return finalErr == nil && finalScr.LastMembersSyncAt > cursorAfterInitial
}, 10*time.Second, 100*time.Millisecond, "Cursor should be updated after mixed operations sync")
// Verify final state
members, membersErr := ss.Channel().GetMembers(model.ChannelMembersGetOptions{
ChannelID: channel.Id,
Limit: 100,
})
require.NoError(t, membersErr)
expectedMembers := 10 - 3 + 5 + 1 // initial - removed + added + system admin
assert.Equal(t, expectedMembers, len(members), "Should have correct final member count")
finalMessages := atomic.LoadInt32(&totalSyncMessages)
finalAdds := atomic.LoadInt32(&addOperations)
finalRemoves := atomic.LoadInt32(&removeOperations)
assert.Greater(t, finalMessages, int32(0), "Should have sync messages")
assert.Greater(t, finalAdds, int32(0), "Should have add operations")
assert.GreaterOrEqual(t, finalRemoves, int32(3), "Should have remove operations")
})
t.Run("Test 6: Multiple remote clusters", func(t *testing.T) {
// This test verifies membership sync across multiple remote clusters:
// 1. Adding users syncs to all 3 remote clusters
// 2. Changes from one cluster propagate through our server to other clusters
// 3. Removals sync to all clusters
EnsureCleanState(t, th, ss)
var totalSyncMessages int32
var syncMessagesPerCluster = make(map[string]*int32)
// Create multiple test HTTP servers to simulate different remote clusters
clusters := make([]*model.RemoteCluster, 3)
testServers := make([]*httptest.Server, 3)
syncHandlers := make([]*SelfReferentialSyncHandler, 3)
// Create 3 remote clusters and their servers
for i := range 3 {
clusterName := fmt.Sprintf("cluster-%d", i+1)
var count int32
syncMessagesPerCluster[clusterName] = &count
// Create sync handler for this cluster
syncHandlers[i] = &SelfReferentialSyncHandler{
t: t,
service: service,
selfCluster: nil, // Will be set later
syncMessageCount: &totalSyncMessages,
}
// Create test server for this cluster
idx := i // Capture index for closure
testServers[i] = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/v4/remotecluster/msg" {
clusterName := fmt.Sprintf("cluster-%d", idx+1)
atomic.AddInt32(syncMessagesPerCluster[clusterName], 1)
// Read body
bodyBytes, readErr := io.ReadAll(r.Body)
if readErr != nil {
writeOKResponse(w)
return
}
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
// Handle with the appropriate sync handler
if clusters[idx] != nil {
syncHandlers[idx].selfCluster = clusters[idx]
syncHandlers[idx].HandleRequest(w, r)
return
}
}
writeOKResponse(w)
}))
}
// Cleanup servers
defer func() {
for _, server := range testServers {
server.Close()
}
}()
// Create a new team for this test to avoid team member limits
team := th.CreateTeam()
// Create and share channel in the new team
channel := &model.Channel{
TeamId: team.Id,
Name: "multi-cluster-test-channel",
DisplayName: "Multi Cluster Test Channel",
Type: model.ChannelTypeOpen,
}
var appErr *model.AppError
channel, appErr = th.App.CreateChannel(th.Context, channel, false)
require.Nil(t, appErr)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: team.Id,
Home: true,
ShareName: "multi-cluster-test",
CreatorId: th.BasicUser.Id,
RemoteId: "", // Empty means sync to all remotes
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create remote clusters
for i := range 3 {
clusters[i] = &model.RemoteCluster{
RemoteId: model.NewId(),
Name: fmt.Sprintf("cluster-%d", i+1),
SiteURL: testServers[i].URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
clusters[i], err = ss.RemoteCluster().Save(clusters[i])
require.NoError(t, err)
// Share channel with this cluster
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: clusters[i].RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
}
// Add users to channel - they should sync to all remote clusters
users := make([]*model.User, 5)
for i := range 5 {
users[i] = th.CreateUser()
_, _, addErr := th.App.AddUserToTeam(th.Context, team.Id, users[i].Id, th.BasicUser.Id)
require.Nil(t, addErr)
_, addErr = th.App.AddUserToChannel(th.Context, users[i], channel, false)
require.Nil(t, addErr)
}
// Sync to all clusters - need to sync each one individually
for _, cluster := range clusters {
err = service.SyncAllChannelMembers(channel.Id, cluster.RemoteId, nil)
require.NoError(t, err)
}
// Wait for syncs to complete
require.Eventually(t, func() bool {
// Each cluster should receive at least 5 sync messages (one per user)
for _, countPtr := range syncMessagesPerCluster {
if atomic.LoadInt32(countPtr) < 5 {
return false
}
}
return true
}, 10*time.Second, 100*time.Millisecond, "All clusters should receive sync messages")
// Verify each cluster received messages
for name, countPtr := range syncMessagesPerCluster {
finalCount := atomic.LoadInt32(countPtr)
assert.GreaterOrEqual(t, finalCount, int32(5),
"Cluster %s should receive at least 5 sync messages", name)
}
// Part 2: Test propagation from one cluster through another
// This simulates cluster-2 receiving a membership change and propagating it
// Reset counters
atomic.StoreInt32(&totalSyncMessages, 0)
for _, countPtr := range syncMessagesPerCluster {
atomic.StoreInt32(countPtr, 0)
}
// Create a new user that will be added "by cluster-2"
userFromCluster2 := th.CreateUser()
_, _, appErr = th.App.AddUserToTeam(th.Context, team.Id, userFromCluster2.Id, th.BasicUser.Id)
require.Nil(t, appErr)
// Create a sync message as if it came from cluster-2 adding this user
syncMsg := model.NewSyncMsg(channel.Id)
syncMsg.MembershipChanges = []*model.MembershipChangeMsg{
{
ChannelId: channel.Id,
UserId: userFromCluster2.Id,
IsAdd: true,
RemoteId: clusters[1].RemoteId, // from cluster-2
ChangeTime: model.GetMillis(),
},
}
// Wrap it in a RemoteClusterMsg
payload, payloadErr := syncMsg.ToJSON()
require.NoError(t, payloadErr)
rcMsg := model.RemoteClusterMsg{
Topic: sharedchannel.TopicSync,
Payload: payload,
}
// Simulate cluster-2 sending this change to our server
// This should trigger our server to propagate to other clusters
response := &remotecluster.Response{}
err = service.OnReceiveSyncMessageForTesting(rcMsg, clusters[1], response)
require.NoError(t, err)
// Our server should now propagate this change to cluster-1 and cluster-3
// Wait for the propagation to happen
require.Eventually(t, func() bool {
count3 := atomic.LoadInt32(syncMessagesPerCluster["cluster-3"])
// We expect at least cluster-3 to receive the propagated change
return count3 >= 1
}, 10*time.Second, 100*time.Millisecond, "Change should propagate to other clusters")
// Verify the user was added locally
member, memberErr := ss.Channel().GetMember(th.Context, channel.Id, userFromCluster2.Id)
require.NoError(t, memberErr, "User should be a member after receiving sync from cluster-2")
require.Equal(t, userFromCluster2.Id, member.UserId)
// Verify cluster-3 received the propagated change
finalCount3 := atomic.LoadInt32(syncMessagesPerCluster["cluster-3"])
assert.GreaterOrEqual(t, finalCount3, int32(1),
"cluster-3 should receive propagated sync from our server")
// Part 3: Test removal syncing to all clusters
// Reset counters
atomic.StoreInt32(&totalSyncMessages, 0)
for _, countPtr := range syncMessagesPerCluster {
atomic.StoreInt32(countPtr, 0)
}
// Remove a user - should sync to all clusters
appErr = th.App.RemoveUserFromChannel(th.Context, users[0].Id, th.SystemAdminUser.Id, channel)
require.Nil(t, appErr)
// Sync removal to all clusters
for _, cluster := range clusters {
err = service.SyncAllChannelMembers(channel.Id, cluster.RemoteId, nil)
require.NoError(t, err)
}
// Wait for removal sync
require.Eventually(t, func() bool {
perCluster := make(map[string]int32)
for name, countPtr := range syncMessagesPerCluster {
perCluster[name] = atomic.LoadInt32(countPtr)
}
// Each cluster should receive at least 1 sync message for the removal
allClustersReceived := true
for _, count := range perCluster {
if count < 1 {
allClustersReceived = false
break
}
}
return allClustersReceived
}, 10*time.Second, 100*time.Millisecond, "All clusters should receive removal sync")
})
t.Run("Test 7: Feature flag disabled", func(t *testing.T) {
// This test verifies that the shared channel membership sync functionality respects the feature flag.
// It tests two scenarios:
// 1. When the feature flag is disabled, no sync messages should be sent even when SyncAllChannelMembers is called
// 2. When the feature flag is enabled, sync messages should be sent as expected
// This ensures that the feature can be safely disabled in production without triggering unintended syncs
EnsureCleanState(t, th, ss)
var syncMessageCount int32
// Disable feature flag from the beginning to prevent any automatic sync
os.Setenv("MM_FEATUREFLAGS_ENABLESHAREDCHANNELMEMBERSYNC", "false")
rErr := th.App.ReloadConfig()
require.NoError(t, rErr)
// Create test HTTP server that counts sync messages
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/v4/remotecluster/msg" {
atomic.AddInt32(&syncMessageCount, 1)
}
writeOKResponse(w)
}))
defer testServer.Close()
// Create and share channel
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "feature-flag-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create self-referential remote cluster
selfCluster := &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-flag-test",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Test 1: Sync with feature flag disabled
th.App.UpdateConfig(func(cfg *model.Config) {
cfg.FeatureFlags.EnableSharedChannelsMemberSync = false
})
// Add users to the channel after disabling the feature flag
for range 3 {
user := th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
}
atomic.StoreInt32(&syncMessageCount, 0)
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Verify no sync messages were sent
require.Never(t, func() bool {
return atomic.LoadInt32(&syncMessageCount) > 0
}, 2*time.Second, 100*time.Millisecond, "No sync should occur with feature flag disabled")
// Test 2: Sync with feature flag enabled
th.App.UpdateConfig(func(cfg *model.Config) {
cfg.FeatureFlags.EnableSharedChannelsMemberSync = true
})
atomic.StoreInt32(&syncMessageCount, 0)
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Verify sync messages were sent
require.Eventually(t, func() bool {
return atomic.LoadInt32(&syncMessageCount) > 0
}, 5*time.Second, 100*time.Millisecond, "Sync should occur with feature flag enabled")
})
t.Run("Test 8: Sync Task After Connection Becomes Available", func(t *testing.T) {
// This test verifies that membership sync works correctly when a remote cluster
// reconnects after being offline. We simulate a cluster that has been unavailable
// (old LastPingAt timestamp) and then comes back online.
// Expected behavior:
// 1. Cluster is created with old LastPingAt (simulating previous offline state)
// 2. When LastPingAt is updated (cluster comes online), sync should work
// 3. All channel members should be synced successfully
// 4. Cursor (LastMembersSyncAt) should be updated after successful sync
EnsureCleanState(t, th, ss)
var syncTaskCreated atomic.Bool
var syncHandler *SelfReferentialSyncHandler
// Create test HTTP server
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
} else {
writeOKResponse(w)
}
}))
defer testServer.Close()
// Create channel and share it
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "reconnect-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create remote cluster that was previously offline (old LastPingAt)
selfCluster := &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-reconnect",
SiteURL: testServer.URL,
CreateAt: model.GetMillis() - 300000, // Created 5 minutes ago
LastPingAt: model.GetMillis() - 120000, // Last ping 2 minutes ago
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Initialize sync handler
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
syncHandler.OnBatchSync = func(userIds []string, messageNumber int32) {
syncTaskCreated.Store(true)
}
// Add some users to sync
for range 3 {
user := th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
}
// Update LastPingAt to simulate cluster coming back online
selfCluster.LastPingAt = model.GetMillis()
_, err = ss.RemoteCluster().Update(selfCluster)
require.NoError(t, err)
// Trigger membership sync as would happen when connection is restored
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Verify sync task was created and executed with more generous timeout
require.Eventually(t, func() bool {
return syncTaskCreated.Load()
}, 15*time.Second, 200*time.Millisecond, "Sync should execute when cluster comes back online")
// Wait for async task queue to be processed
require.Eventually(t, func() bool {
return !service.HasPendingTasksForTesting()
}, 10*time.Second, 200*time.Millisecond, "All async sync tasks should be completed")
// Verify cursor was updated with extended timeout
require.Eventually(t, func() bool {
updatedScr, scrErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
return scrErr == nil && updatedScr.LastMembersSyncAt > 0
}, 20*time.Second, 200*time.Millisecond, "Cursor should be updated after sync")
})
t.Run("Test 9: Remote Cluster Offline During Sync", func(t *testing.T) {
// This test verifies graceful failure handling when a remote cluster goes offline
// during the sync operation. We simulate a server that works initially but then
// becomes unavailable during subsequent sync attempts.
// Expected behavior:
// 1. First sync succeeds and updates the cursor
// 2. Server goes offline for the second sync attempt
// 3. Second sync fails gracefully without errors
// 4. Cursor remains at the value from the successful first sync (no corruption)
// 5. No partial data is persisted from the failed sync
EnsureCleanState(t, th, ss)
var syncAttempts int32
var serverOnline atomic.Bool
serverOnline.Store(true)
var syncHandler *SelfReferentialSyncHandler
// Create test HTTP server that can simulate going offline
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !serverOnline.Load() {
// Simulate server being offline
w.WriteHeader(http.StatusServiceUnavailable)
return
}
if r.URL.Path == "/api/v4/remotecluster/msg" {
currentAttempt := atomic.AddInt32(&syncAttempts, 1)
// On second sync cycle, go offline (allow first full sync to complete)
if currentAttempt > 2 {
serverOnline.Store(false)
w.WriteHeader(http.StatusServiceUnavailable)
return
}
// First sync succeeds - use sync handler if available for all requests in first sync
if syncHandler != nil {
syncHandler.HandleRequest(w, r)
return
}
}
writeOKResponse(w)
}))
defer testServer.Close()
// Create channel and share it
channel := th.CreateChannel(th.Context, th.BasicTeam)
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
Home: true,
ShareName: "offline-test",
CreatorId: th.BasicUser.Id,
RemoteId: "",
}
_, shareErr := th.App.ShareChannel(th.Context, sc)
require.NoError(t, shareErr)
// Create remote cluster
selfCluster := &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-offline",
SiteURL: testServer.URL,
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Share channel with self
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
// Add users to sync - use more than batch size to test batch sync
// Default batch size is 20, so use 25 users to ensure batch processing
for range 25 {
user := th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
}
// Initialize sync handler
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
// First sync should succeed
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
// Wait for first sync with more generous timeout
require.Eventually(t, func() bool {
return atomic.LoadInt32(&syncAttempts) >= 1
}, 15*time.Second, 200*time.Millisecond, "Should complete first sync")
// Wait for cursor to be updated after first sync
var firstCursor int64
require.Eventually(t, func() bool {
scr1, scrErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
if scrErr != nil {
return false
}
firstCursor = scr1.LastMembersSyncAt
return firstCursor > 0
}, 10*time.Second, 200*time.Millisecond, "Cursor should be set after first sync")
// Add more users to ensure we still have > 20 total for batch sync
for range 5 {
user := th.CreateUser()
_, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
require.Nil(t, appErr)
_, appErr = th.App.AddUserToChannel(th.Context, user, channel, false)
require.Nil(t, appErr)
}
// Second sync should fail (server goes offline)
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err) // Method itself shouldn't error
// Wait for second sync attempt with more generous timeout
require.Eventually(t, func() bool {
return atomic.LoadInt32(&syncAttempts) >= 2
}, 20*time.Second, 200*time.Millisecond, "Should attempt second sync")
// Wait for any cursor updates to complete and verify cursor was not updated
require.Never(t, func() bool {
scr2, scrErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
if scrErr != nil {
return false
}
return scr2.LastMembersSyncAt > firstCursor
}, 5*time.Second, 200*time.Millisecond, "Cursor should not update when sync fails")
})
t.Run("Test 10: Users in Multiple Shared Channels", func(t *testing.T) {
// This test verifies deduplication when users belong to multiple shared channels.
// Unlike global user sync which syncs users once regardless of channel membership,
// membership sync operates per channel and must handle users in multiple channels.
// Test setup:
// - user1: member of all 3 shared channels
// - user2: member of 2 shared channels (channel1 and channel2)
// - user3: member of 1 shared channel (channel3)
// Expected behavior:
// - Each user is synced exactly once per channel they belong to
// - No duplicate sync messages for the same user in the same channel
// - Users not in a channel are not synced for that channel
EnsureCleanState(t, th, ss)
var syncedChannelUsers = make(map[string][]string) // channelId -> userIds
var mu sync.Mutex
var syncHandler *SelfReferentialSyncHandler
var testServer *httptest.Server
var totalSyncMessages int32
// Create users
user1 := th.CreateUser()
user2 := th.CreateUser()
user3 := th.CreateUser()
// Add users to team
th.LinkUserToTeam(user1, th.BasicTeam)
th.LinkUserToTeam(user2, th.BasicTeam)
th.LinkUserToTeam(user3, th.BasicTeam)
// Create multiple shared channels
channel1 := th.CreateChannel(th.Context, th.BasicTeam)
channel2 := th.CreateChannel(th.Context, th.BasicTeam)
channel3 := th.CreateChannel(th.Context, th.BasicTeam)
// Add users to multiple shared channels
// user1 in all channels
th.AddUserToChannel(user1, channel1)
th.AddUserToChannel(user1, channel2)
th.AddUserToChannel(user1, channel3)
// user2 in two channels
th.AddUserToChannel(user2, channel1)
th.AddUserToChannel(user2, channel2)
// user3 in one channel
th.AddUserToChannel(user3, channel3)
// First create the remote cluster with a placeholder URL
selfCluster := &model.RemoteCluster{
RemoteId: model.NewId(),
Name: "self-cluster-multi-channel",
SiteURL: "http://placeholder",
CreateAt: model.GetMillis(),
LastPingAt: model.GetMillis(),
Token: model.NewId(),
CreatorId: th.BasicUser.Id,
RemoteToken: model.NewId(),
}
selfCluster, err = ss.RemoteCluster().Save(selfCluster)
require.NoError(t, err)
// Initialize sync handler before creating the test server
syncHandler = NewSelfReferentialSyncHandler(t, service, selfCluster)
// Create a wrapper handler to intercept sync messages
testServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Intercept and track channel syncs
if r.URL.Path == "/api/v4/remotecluster/msg" {
bodyBytes, pErr := io.ReadAll(r.Body)
if pErr == nil {
// Restore body for actual handler
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
var frame model.RemoteClusterFrame
if json.Unmarshal(bodyBytes, &frame) == nil {
var syncMsg model.SyncMsg
if json.Unmarshal(frame.Msg.Payload, &syncMsg) == nil {
mu.Lock()
channelId := syncMsg.ChannelId
if channelId != "" {
if _, ok := syncedChannelUsers[channelId]; !ok {
syncedChannelUsers[channelId] = []string{}
}
// Add all users from membership changes
for _, change := range syncMsg.MembershipChanges {
if change.IsAdd {
syncedChannelUsers[channelId] = append(syncedChannelUsers[channelId], change.UserId)
atomic.AddInt32(&totalSyncMessages, 1)
}
}
}
mu.Unlock()
}
}
}
}
// Call the actual sync handler
syncHandler.HandleRequest(w, r)
}))
defer testServer.Close()
// Update the cluster with the actual test server URL
selfCluster.SiteURL = testServer.URL
_, err = ss.RemoteCluster().Update(selfCluster)
require.NoError(t, err)
// Make channels shared
for i, channel := range []*model.Channel{channel1, channel2, channel3} {
sc := &model.SharedChannel{
ChannelId: channel.Id,
TeamId: th.BasicTeam.Id,
RemoteId: selfCluster.RemoteId,
Home: true,
ReadOnly: false,
ShareName: fmt.Sprintf("channel%d", i+1),
CreatorId: th.BasicUser.Id,
}
_, err = ss.SharedChannel().Save(sc)
require.NoError(t, err)
scr := &model.SharedChannelRemote{
Id: model.NewId(),
ChannelId: channel.Id,
CreatorId: th.BasicUser.Id,
RemoteId: selfCluster.RemoteId,
IsInviteAccepted: true,
IsInviteConfirmed: true,
LastMembersSyncAt: 0,
}
_, err = ss.SharedChannel().SaveRemote(scr)
require.NoError(t, err)
}
// Sync memberships for each channel separately
for _, channel := range []*model.Channel{channel1, channel2, channel3} {
err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
require.NoError(t, err)
}
// Ensure the sync handler is ready by waiting for the first message
require.Eventually(t, func() bool {
return atomic.LoadInt32(&totalSyncMessages) > 0
}, 10*time.Second, 50*time.Millisecond, "Expected at least one sync message to be sent")
// Calculate expected number of sync messages
// channel1: user1, user2, BasicUser = 3
// channel2: user1, user2, BasicUser = 3
// channel3: user1, user3, BasicUser = 3
// Total: 9 sync messages (one per user per channel)
expectedSyncMessages := int32(9)
// Wait for all sync messages to be processed with detailed debugging
require.Eventually(t, func() bool {
currentMessages := atomic.LoadInt32(&totalSyncMessages)
mu.Lock()
channelCount := len(syncedChannelUsers)
var totalUsers int
for _, users := range syncedChannelUsers {
totalUsers += len(users)
}
mu.Unlock()
// Log progress for debugging
if currentMessages < expectedSyncMessages {
t.Logf("Waiting for sync messages: %d/%d received, %d channels synced, %d total users",
currentMessages, expectedSyncMessages, channelCount, totalUsers)
}
return currentMessages >= expectedSyncMessages
}, 30*time.Second, 200*time.Millisecond,
fmt.Sprintf("Expected %d sync messages, but got %d", expectedSyncMessages, atomic.LoadInt32(&totalSyncMessages)))
// Verify we have complete data for all channels
require.Eventually(t, func() bool {
mu.Lock()
defer mu.Unlock()
// Verify we have sync data for all 3 channels
if len(syncedChannelUsers) != 3 {
return false
}
// Verify each channel has the expected number of users
channel1Users, ok1 := syncedChannelUsers[channel1.Id]
channel2Users, ok2 := syncedChannelUsers[channel2.Id]
channel3Users, ok3 := syncedChannelUsers[channel3.Id]
if !ok1 || !ok2 || !ok3 {
return false
}
// Count unique users per channel
channel1Count := len(getUniqueUsers(channel1Users))
channel2Count := len(getUniqueUsers(channel2Users))
channel3Count := len(getUniqueUsers(channel3Users))
// channel1 should have user1, user2, BasicUser = 3
// channel2 should have user1, user2, BasicUser = 3
// channel3 should have user1, user3, BasicUser = 3
expectedCounts := channel1Count == 3 && channel2Count == 3 && channel3Count == 3
if !expectedCounts {
t.Logf("Channel user counts - channel1: %d, channel2: %d, channel3: %d",
channel1Count, channel2Count, channel3Count)
}
return expectedCounts
}, 30*time.Second, 200*time.Millisecond, "Expected all channels to have their users synced")
// Verify each user is synced exactly once per channel they belong to
mu.Lock()
defer mu.Unlock()
// Verify channel1 synced user1, user2, and BasicUser
channel1Users := syncedChannelUsers[channel1.Id]
userCount1 := make(map[string]int)
for _, userId := range channel1Users {
userCount1[userId]++
}
assert.Equal(t, 1, userCount1[user1.Id], "User1 should be synced exactly once for channel1")
assert.Equal(t, 1, userCount1[user2.Id], "User2 should be synced exactly once for channel1")
assert.Equal(t, 0, userCount1[user3.Id], "User3 should not be synced for channel1")
// Verify channel2 synced user1, user2, and BasicUser
channel2Users := syncedChannelUsers[channel2.Id]
userCount2 := make(map[string]int)
for _, userId := range channel2Users {
userCount2[userId]++
}
assert.Equal(t, 1, userCount2[user1.Id], "User1 should be synced exactly once for channel2")
assert.Equal(t, 1, userCount2[user2.Id], "User2 should be synced exactly once for channel2")
assert.Equal(t, 0, userCount2[user3.Id], "User3 should not be synced for channel2")
// Verify channel3 synced user1, user3, and BasicUser
channel3Users := syncedChannelUsers[channel3.Id]
userCount3 := make(map[string]int)
for _, userId := range channel3Users {
userCount3[userId]++
}
assert.Equal(t, 1, userCount3[user1.Id], "User1 should be synced exactly once for channel3")
assert.Equal(t, 0, userCount3[user2.Id], "User2 should not be synced for channel3")
assert.Equal(t, 1, userCount3[user3.Id], "User3 should be synced exactly once for channel3")
})
// t.Run("Test 11: Strengthened conflict resolution", func(t *testing.T) {
// // This test verifies robust handling of conflicting membership states between multiple clusters.
// // Strengthened conflict scenarios tested:
// // 1. Basic conflict: user removed from one cluster but not another
// // 2. Re-addition after partial sync: user re-added after being synced to only one cluster
// // 3. Cursor consistency: verify sync cursors are properly updated across conflict resolution
// // 4. Post-conflict operations: ensure system works correctly after resolving conflicts
// // The test ensures the system correctly resolves conflicts and maintains data consistency
// EnsureCleanState(t, th, ss)
// var syncMessages []model.SyncMsg
// var mu sync.Mutex
// var syncMessageCount int32
// var selfCluster *model.RemoteCluster
// // Create sync handler
// syncHandler := NewSelfReferentialSyncHandler(t, service, nil)
// // Create test HTTP server that tracks sync messages
// testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// if r.URL.Path == "/api/v4/remotecluster/msg" {
// atomic.AddInt32(&syncMessageCount, 1)
// // Read body once
// bodyBytes, readErr := io.ReadAll(r.Body)
// if readErr != nil {
// writeOKResponse(w)
// return
// }
// r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
// // Capture all sync messages - parse as RemoteClusterFrame
// var frame model.RemoteClusterFrame
// if unmarshalErr := json.Unmarshal(bodyBytes, &frame); unmarshalErr == nil {
// var syncMsg model.SyncMsg
// if payloadErr := json.Unmarshal(frame.Msg.Payload, &syncMsg); payloadErr == nil {
// mu.Lock()
// syncMessages = append(syncMessages, syncMsg)
// mu.Unlock()
// }
// }
// // Handle with sync handler for proper response
// if selfCluster != nil {
// syncHandler.selfCluster = selfCluster
// syncHandler.HandleRequest(w, r)
// return
// }
// }
// writeOKResponse(w)
// }))
// defer testServer.Close()
// // Create and share channel
// channel := th.CreateChannel(th.Context, th.BasicTeam)
// sc := &model.SharedChannel{
// ChannelId: channel.Id,
// TeamId: th.BasicTeam.Id,
// Home: true,
// ShareName: "strengthened-conflict-test",
// CreatorId: th.BasicUser.Id,
// RemoteId: "",
// }
// _, shareErr := th.App.ShareChannel(th.Context, sc)
// require.NoError(t, shareErr)
// // Create self-referential remote cluster
// selfCluster = &model.RemoteCluster{
// RemoteId: model.NewId(),
// Name: "self-cluster-strengthened-conflict",
// SiteURL: testServer.URL,
// CreateAt: model.GetMillis(),
// LastPingAt: model.GetMillis(),
// Token: model.NewId(),
// CreatorId: th.BasicUser.Id,
// RemoteToken: model.NewId(),
// }
// selfCluster, err = ss.RemoteCluster().Save(selfCluster)
// require.NoError(t, err)
// // Share channel with self
// scr := &model.SharedChannelRemote{
// Id: model.NewId(),
// ChannelId: channel.Id,
// CreatorId: th.BasicUser.Id,
// RemoteId: selfCluster.RemoteId,
// IsInviteAccepted: true,
// IsInviteConfirmed: true,
// LastMembersSyncAt: 0,
// }
// _, err = ss.SharedChannel().SaveRemote(scr)
// require.NoError(t, err)
// // Phase 1: Create users for conflict scenarios
// conflictUser1 := th.CreateUser()
// conflictUser2 := th.CreateUser()
// // Add users to team
// for _, user := range []*model.User{conflictUser1, conflictUser2} {
// _, _, appErr := th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, user.Id, th.BasicUser.Id)
// require.Nil(t, appErr)
// }
// // Add users to channel initially
// for _, user := range []*model.User{conflictUser1, conflictUser2} {
// _, appErr := th.App.AddUserToChannel(th.Context, user, channel, false)
// require.Nil(t, appErr)
// }
// // Verify users are initially members
// for _, user := range []*model.User{conflictUser1, conflictUser2} {
// _, initialErr := ss.Channel().GetMember(context.Background(), channel.Id, user.Id)
// require.NoError(t, initialErr, "User should be initially a member")
// }
// // Phase 2: Initial sync
// err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
// require.NoError(t, err)
// // Wait for initial sync to complete
// require.Eventually(t, func() bool {
// count := atomic.LoadInt32(&syncMessageCount)
// return count > 0
// }, 15*time.Second, 200*time.Millisecond, "Should have initial sync messages")
// // Get initial cursor
// initialScr, scrErr := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
// require.NoError(t, scrErr)
// initialCursor := initialScr.LastMembersSyncAt
// // Phase 3: Create conflict by removing user locally
// // Temporarily disable automatic sync to prevent interference with manual operations
// th.App.UpdateConfig(func(cfg *model.Config) {
// cfg.FeatureFlags.EnableSharedChannelsMemberSync = false
// })
// rErr := th.App.ReloadConfig()
// require.NoError(t, rErr)
// // Remove conflictUser1 locally
// appErr := th.App.RemoveUserFromChannel(th.Context, conflictUser1.Id, th.SystemAdminUser.Id, channel)
// require.Nil(t, appErr, "Failed to remove conflictUser1")
// // Wait for removal to complete
// require.Eventually(t, func() bool {
// _, err1 := ss.Channel().GetMember(context.Background(), channel.Id, conflictUser1.Id)
// return err1 != nil // User should not be found
// }, 15*time.Second, 200*time.Millisecond, "User should be removed from channel")
// // Phase 4: Re-add user (creating conflict state)
// // Re-add conflictUser1 (simulating conflict between local state and remote state)
// _, appErr = th.App.AddUserToChannel(th.Context, conflictUser1, channel, false)
// require.Nil(t, appErr)
// // Re-enable automatic sync for the conflict resolution phase
// th.App.UpdateConfig(func(cfg *model.Config) {
// cfg.FeatureFlags.EnableSharedChannelsMemberSync = true
// })
// rErr = th.App.ReloadConfig()
// require.NoError(t, rErr)
// // Wait for local operation to complete
// require.Eventually(t, func() bool {
// _, err1 := ss.Channel().GetMember(context.Background(), channel.Id, conflictUser1.Id)
// return err1 == nil // User should be found
// }, 10*time.Second, 200*time.Millisecond, "User should be locally a member after re-adding")
// // Phase 5: Conflict resolution sync
// // Reset message tracking for conflict resolution phase
// atomic.StoreInt32(&syncMessageCount, 0)
// mu.Lock()
// syncMessages = []model.SyncMsg{}
// mu.Unlock()
// err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
// require.NoError(t, err)
// // Wait for conflict resolution sync to complete
// require.Eventually(t, func() bool {
// count := atomic.LoadInt32(&syncMessageCount)
// return count > 0
// }, 20*time.Second, 200*time.Millisecond, "Should receive conflict resolution sync messages")
// // Phase 6: Verification of conflict resolution
// mu.Lock()
// totalMessages := len(syncMessages)
// mu.Unlock()
// // Verify we received sync messages for conflict resolution
// assert.Greater(t, totalMessages, 0, "Should have sync messages for conflict resolution")
// // Verify final membership state is consistent
// expectedMembers := map[string]bool{
// conflictUser1.Id: true, // Re-added, should be member
// conflictUser2.Id: true, // Never removed, should be member
// th.BasicUser.Id: true, // Basic user, should be member
// }
// for userId, shouldBeMember := range expectedMembers {
// require.Eventually(t, func() bool {
// _, memberErr := ss.Channel().GetMember(context.Background(), channel.Id, userId)
// if shouldBeMember {
// return memberErr == nil
// }
// return memberErr != nil
// }, 10*time.Second, 200*time.Millisecond,
// "User %s membership state should be consistent after conflict resolution", userId)
// }
// // Phase 7: Verify cursor updates
// require.Eventually(t, func() bool {
// scrUpdated, err1 := ss.SharedChannel().GetRemoteByIds(channel.Id, selfCluster.RemoteId)
// return err1 == nil && scrUpdated.LastMembersSyncAt > initialCursor
// }, 15*time.Second, 200*time.Millisecond, "Cluster should have updated sync cursor")
// // Phase 8: Test that new operations after conflict resolution work correctly
// newUser := th.CreateUser()
// _, _, appErr = th.App.AddUserToTeam(th.Context, th.BasicTeam.Id, newUser.Id, th.BasicUser.Id)
// require.Nil(t, appErr)
// _, appErr = th.App.AddUserToChannel(th.Context, newUser, channel, false)
// require.Nil(t, appErr)
// // Reset and sync this new user
// atomic.StoreInt32(&syncMessageCount, 0)
// mu.Lock()
// syncMessages = []model.SyncMsg{}
// mu.Unlock()
// err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
// require.NoError(t, err)
// // Verify the new user is synced correctly
// require.Eventually(t, func() bool {
// mu.Lock()
// defer mu.Unlock()
// // Check if the new user appears in any sync message
// for _, msg := range syncMessages {
// for _, change := range msg.MembershipChanges {
// if change.UserId == newUser.Id && change.IsAdd {
// return true
// }
// }
// }
// return false
// }, 15*time.Second, 200*time.Millisecond, "New user should be synced correctly after conflict resolution")
// // Phase 9: Verify efficiency - no redundant syncs for existing members
// atomic.StoreInt32(&syncMessageCount, 0)
// mu.Lock()
// syncMessages = []model.SyncMsg{}
// mu.Unlock()
// // Trigger another sync - should not send messages for already-synced members
// err = service.SyncAllChannelMembers(channel.Id, selfCluster.RemoteId, nil)
// require.NoError(t, err)
// // Wait for sync completion and verify minimal activity
// // Give time for any sync to complete, then check the final count
// require.Eventually(t, func() bool {
// finalCount := atomic.LoadInt32(&syncMessageCount)
// // Should have minimal activity since all members are already synced
// return finalCount <= 1
// }, 10*time.Second, 200*time.Millisecond, "Should have minimal sync activity for already-synced members")
// })
}
// Helper function to get unique users from a list
func getUniqueUsers(users []string) map[string]bool {
unique := make(map[string]bool)
for _, user := range users {
unique[user] = true
}
return unique
}