mattermost-community-enterp.../channels/jobs/batch_migration_worker.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

124 lines
4.6 KiB
Go

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package jobs
import (
"net/http"
"time"
"github.com/mattermost/mattermost/server/public/model"
"github.com/mattermost/mattermost/server/public/shared/mlog"
"github.com/mattermost/mattermost/server/public/shared/request"
"github.com/mattermost/mattermost/server/v8/channels/store"
)
type BatchMigrationWorkerAppIFace interface {
GetClusterStatus(rctx request.CTX) ([]*model.ClusterInfo, error)
}
// BatchMigrationWorker processes database migration jobs in batches to help avoid table locks.
//
// It uses the jobs infrastructure to ensure only one node in the cluster runs the migration at
// any given time, avoids running the migration until the cluster is uniform, and automatically
// resets the migration if the cluster version diverges after starting.
//
// In principle, the job infrastructure is overkill for this kind of work, as there's a worker
// created per migration. There's also complication with edge cases, like having to restart the
// server in order to retry a failed migration job. Refactoring the job infrastructure is left as
// a future exercise.
type BatchMigrationWorker struct {
*BatchWorker
app BatchMigrationWorkerAppIFace
migrationKey string
doMigrationBatch func(data model.StringMap, store store.Store) (model.StringMap, bool, error)
}
// MakeBatchMigrationWorker creates a worker to process the given migration batch function.
func MakeBatchMigrationWorker(
jobServer *JobServer,
store store.Store,
app BatchMigrationWorkerAppIFace,
migrationKey string,
timeBetweenBatches time.Duration,
doMigrationBatch func(data model.StringMap, store store.Store) (model.StringMap, bool, error),
) *BatchMigrationWorker {
worker := &BatchMigrationWorker{
app: app,
migrationKey: migrationKey,
doMigrationBatch: doMigrationBatch,
}
worker.BatchWorker = MakeBatchWorker(jobServer, store, timeBetweenBatches, worker.doBatch)
return worker
}
func (worker *BatchMigrationWorker) doBatch(rctx request.CTX, job *model.Job) bool {
// Ensure the cluster remains in sync, otherwise we restart the job to
// ensure a complete migration. Technically, the cluster could go out of
// sync briefly within a batch, but we accept that risk.
if !worker.checkIsClusterInSync(rctx) {
worker.logger.Warn("Worker: Resetting job")
worker.resetJob(worker.logger, job)
return true
}
nextData, done, err := worker.doMigrationBatch(job.Data, worker.store)
if err != nil {
worker.logger.Error("Worker: Failed to do migration batch. Exiting", mlog.Err(err))
worker.setJobError(worker.logger, job, model.NewAppError("doMigrationBatch", model.NoTranslation, nil, "", http.StatusInternalServerError).Wrap(err))
return true
} else if done {
worker.logger.Info("Worker: Job is complete")
worker.setJobSuccess(worker.logger, job)
worker.markAsComplete()
return true
}
job.Data = nextData
// Migrations currently don't support reporting meaningful progress.
if err := worker.jobServer.SetJobProgress(job, 0); err != nil {
worker.logger.Error("Worker: Failed to set job progress", mlog.Err(err))
return false
}
return false
}
// checkIsClusterInSync returns true if all nodes in the cluster are running the same version,
// logging a warning on the first mismatch found.
func (worker *BatchMigrationWorker) checkIsClusterInSync(rctx request.CTX) bool {
clusterStatus, err := worker.app.GetClusterStatus(rctx)
if err != nil {
worker.logger.Error("Worker: Failed to get cluster status", mlog.Err(err))
return false
}
for i := 1; i < len(clusterStatus); i++ {
if clusterStatus[i].SchemaVersion != clusterStatus[0].SchemaVersion {
rctx.Logger().Warn(
"Worker: cluster not in sync",
mlog.String("schema_version_a", clusterStatus[0].SchemaVersion),
mlog.String("schema_version_b", clusterStatus[1].SchemaVersion),
mlog.String("server_ip_a", clusterStatus[0].IPAddress),
mlog.String("server_ip_b", clusterStatus[1].IPAddress),
)
return false
}
}
return true
}
// markAsComplete records a discrete migration key to prevent this job from ever running again.
func (worker *BatchMigrationWorker) markAsComplete() {
system := model.System{
Name: worker.migrationKey,
Value: "true",
}
// Note that if this fails, then the job would have still succeeded. We will spuriously
// run the job again in the future, but as migrations are idempotent it won't be an issue.
if err := worker.jobServer.Store.System().Save(&system); err != nil {
worker.logger.Error("Worker: Failed to mark migration as completed in the systems table.", mlog.String("migration_key", worker.migrationKey), mlog.Err(err))
}
}