// Copyright (c) 2024 Mattermost Community Enterprise // Bleve Search Engine Implementation - Lightweight alternative to Elasticsearch package searchengine import ( "fmt" "net/http" "os" "path/filepath" "strings" "sync" "time" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" "github.com/blevesearch/bleve/v2/analysis/analyzer/standard" "github.com/blevesearch/bleve/v2/analysis/lang/cjk" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/porter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" "github.com/mattermost/mattermost/server/public/model" "github.com/mattermost/mattermost/server/public/shared/mlog" "github.com/mattermost/mattermost/server/public/shared/request" ) const ( BleveEngineName = "bleve" BleveEngineVersion = 2 PostIndexName = "posts" ChannelIndexName = "channels" UserIndexName = "users" FileIndexName = "files" ) // BleveConfig holds configuration for Bleve search engine type BleveConfig struct { Config func() *model.Config Logger mlog.LoggerIFace } // BleveSettings holds Bleve-specific settings type BleveSettings struct { IndexDir string EnableIndexing bool EnableSearching bool EnableAutocomplete bool BulkIndexingTimeWindowSeconds int } // BleveEngine implements SearchEngineInterface using Bleve type BleveEngine struct { config func() *model.Config logger mlog.LoggerIFace settings *BleveSettings postIndex bleve.Index channelIndex bleve.Index userIndex bleve.Index fileIndex bleve.Index mutex sync.RWMutex started bool } // Indexed document types type PostDocument struct { ID string `json:"id"` TeamID string `json:"team_id"` ChannelID string `json:"channel_id"` UserID string `json:"user_id"` Message string `json:"message"` CreateAt time.Time `json:"create_at"` Type string `json:"type"` Hashtags string `json:"hashtags"` } type ChannelDocument struct { ID string `json:"id"` TeamID string `json:"team_id"` Type string `json:"type"` Name string `json:"name"` DisplayName string `json:"display_name"` Header string `json:"header"` Purpose string `json:"purpose"` UserIDs []string `json:"user_ids"` TeamMemberIDs []string `json:"team_member_ids"` } type UserDocument struct { ID string `json:"id"` Username string `json:"username"` Nickname string `json:"nickname"` FirstName string `json:"first_name"` LastName string `json:"last_name"` Email string `json:"email"` TeamIDs []string `json:"team_ids"` ChannelIDs []string `json:"channel_ids"` DeleteAt int64 `json:"delete_at"` } type FileDocument struct { ID string `json:"id"` ChannelID string `json:"channel_id"` PostID string `json:"post_id"` UserID string `json:"user_id"` Name string `json:"name"` Extension string `json:"extension"` Content string `json:"content"` CreateAt time.Time `json:"create_at"` } // NewBleveEngine creates a new Bleve search engine func NewBleveEngine(cfg *BleveConfig) *BleveEngine { return &BleveEngine{ config: cfg.Config, logger: cfg.Logger, settings: &BleveSettings{ IndexDir: "/var/mattermost/bleve-indexes", EnableIndexing: true, EnableSearching: true, EnableAutocomplete: true, BulkIndexingTimeWindowSeconds: 60, }, } } // Start initializes and starts the Bleve engine func (b *BleveEngine) Start() *model.AppError { b.mutex.Lock() defer b.mutex.Unlock() if b.started { return nil } // Create index directory if not exists if err := os.MkdirAll(b.settings.IndexDir, 0755); err != nil { return model.NewAppError("BleveEngine.Start", "searchengine.bleve.create_dir", nil, err.Error(), http.StatusInternalServerError) } // Open or create indexes var err error b.postIndex, err = b.openOrCreateIndex(PostIndexName, b.createPostMapping()) if err != nil { return model.NewAppError("BleveEngine.Start", "searchengine.bleve.open_post_index", nil, err.Error(), http.StatusInternalServerError) } b.channelIndex, err = b.openOrCreateIndex(ChannelIndexName, b.createChannelMapping()) if err != nil { return model.NewAppError("BleveEngine.Start", "searchengine.bleve.open_channel_index", nil, err.Error(), http.StatusInternalServerError) } b.userIndex, err = b.openOrCreateIndex(UserIndexName, b.createUserMapping()) if err != nil { return model.NewAppError("BleveEngine.Start", "searchengine.bleve.open_user_index", nil, err.Error(), http.StatusInternalServerError) } b.fileIndex, err = b.openOrCreateIndex(FileIndexName, b.createFileMapping()) if err != nil { return model.NewAppError("BleveEngine.Start", "searchengine.bleve.open_file_index", nil, err.Error(), http.StatusInternalServerError) } b.started = true b.logger.Info("Bleve search engine started", mlog.String("index_dir", b.settings.IndexDir), ) return nil } // Stop closes all indexes func (b *BleveEngine) Stop() *model.AppError { b.mutex.Lock() defer b.mutex.Unlock() if !b.started { return nil } if b.postIndex != nil { b.postIndex.Close() } if b.channelIndex != nil { b.channelIndex.Close() } if b.userIndex != nil { b.userIndex.Close() } if b.fileIndex != nil { b.fileIndex.Close() } b.started = false b.logger.Info("Bleve search engine stopped") return nil } func (b *BleveEngine) GetFullVersion() string { return fmt.Sprintf("%s.%d", BleveEngineName, BleveEngineVersion) } func (b *BleveEngine) GetVersion() int { return BleveEngineVersion } func (b *BleveEngine) GetPlugins() []string { return []string{} } func (b *BleveEngine) UpdateConfig(cfg *model.Config) { // Update settings from config if needed } func (b *BleveEngine) GetName() string { return BleveEngineName } func (b *BleveEngine) IsEnabled() bool { return b.settings.EnableIndexing || b.settings.EnableSearching } func (b *BleveEngine) IsActive() bool { return b.started } func (b *BleveEngine) IsIndexingEnabled() bool { return b.settings.EnableIndexing } func (b *BleveEngine) IsSearchEnabled() bool { return b.settings.EnableSearching } func (b *BleveEngine) IsAutocompletionEnabled() bool { return b.settings.EnableAutocomplete } func (b *BleveEngine) IsIndexingSync() bool { return true // Bleve indexes synchronously } // Post operations func (b *BleveEngine) IndexPost(post *model.Post, teamId string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.postIndex == nil { return model.NewAppError("BleveEngine.IndexPost", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } doc := PostDocument{ ID: post.Id, TeamID: teamId, ChannelID: post.ChannelId, UserID: post.UserId, Message: post.Message, CreateAt: time.Unix(0, post.CreateAt*int64(time.Millisecond)), Type: post.Type, Hashtags: post.Hashtags, } if err := b.postIndex.Index(post.Id, doc); err != nil { return model.NewAppError("BleveEngine.IndexPost", "searchengine.bleve.index_post", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) SearchPosts(channels model.ChannelList, searchParams []*model.SearchParams, page, perPage int) ([]string, model.PostSearchMatches, *model.AppError) { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.postIndex == nil { return nil, nil, model.NewAppError("BleveEngine.SearchPosts", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } // Build channel ID filter channelIDs := make([]string, len(channels)) for i, ch := range channels { channelIDs[i] = ch.Id } // Build query from search params queries := []query.Query{} for _, params := range searchParams { // Main terms query if params.Terms != "" { terms := strings.Fields(params.Terms) for _, term := range terms { if params.IsHashtag { // Hashtag search q := bleve.NewTermQuery(term) q.SetField("hashtags") queries = append(queries, q) } else { // Full text search q := bleve.NewMatchQuery(term) q.SetField("message") queries = append(queries, q) } } } // Channel filter if len(params.InChannels) > 0 { for _, ch := range params.InChannels { q := bleve.NewTermQuery(ch) q.SetField("channel_id") queries = append(queries, q) } } // User filter if len(params.FromUsers) > 0 { for _, user := range params.FromUsers { q := bleve.NewTermQuery(user) q.SetField("user_id") queries = append(queries, q) } } } // Must be in allowed channels channelQueries := make([]query.Query, len(channelIDs)) for i, chID := range channelIDs { q := bleve.NewTermQuery(chID) q.SetField("channel_id") channelQueries[i] = q } channelFilter := bleve.NewDisjunctionQuery(channelQueries...) // Combine all queries var finalQuery query.Query if len(queries) > 0 { mainQuery := bleve.NewConjunctionQuery(queries...) finalQuery = bleve.NewConjunctionQuery(mainQuery, channelFilter) } else { finalQuery = channelFilter } // Execute search searchRequest := bleve.NewSearchRequestOptions(finalQuery, perPage, page*perPage, false) searchRequest.SortBy([]string{"-create_at"}) result, err := b.postIndex.Search(searchRequest) if err != nil { return nil, nil, model.NewAppError("BleveEngine.SearchPosts", "searchengine.bleve.search_posts", nil, err.Error(), http.StatusInternalServerError) } // Extract post IDs postIDs := make([]string, len(result.Hits)) matches := make(model.PostSearchMatches) for i, hit := range result.Hits { postIDs[i] = hit.ID // Add match info if available if len(hit.Fragments) > 0 { matches[hit.ID] = hit.Fragments["message"] } } return postIDs, matches, nil } func (b *BleveEngine) DeletePost(post *model.Post) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.postIndex == nil { return nil } if err := b.postIndex.Delete(post.Id); err != nil { return model.NewAppError("BleveEngine.DeletePost", "searchengine.bleve.delete_post", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) DeleteChannelPosts(rctx request.CTX, channelID string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.postIndex == nil { return nil } // Find all posts in channel q := bleve.NewTermQuery(channelID) q.SetField("channel_id") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 10000 result, err := b.postIndex.Search(searchRequest) if err != nil { return model.NewAppError("BleveEngine.DeleteChannelPosts", "searchengine.bleve.search_channel_posts", nil, err.Error(), http.StatusInternalServerError) } // Delete each post batch := b.postIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } if err := b.postIndex.Batch(batch); err != nil { return model.NewAppError("BleveEngine.DeleteChannelPosts", "searchengine.bleve.delete_channel_posts", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) DeleteUserPosts(rctx request.CTX, userID string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.postIndex == nil { return nil } // Find all posts by user q := bleve.NewTermQuery(userID) q.SetField("user_id") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 10000 result, err := b.postIndex.Search(searchRequest) if err != nil { return model.NewAppError("BleveEngine.DeleteUserPosts", "searchengine.bleve.search_user_posts", nil, err.Error(), http.StatusInternalServerError) } // Delete each post batch := b.postIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } if err := b.postIndex.Batch(batch); err != nil { return model.NewAppError("BleveEngine.DeleteUserPosts", "searchengine.bleve.delete_user_posts", nil, err.Error(), http.StatusInternalServerError) } return nil } // Channel operations func (b *BleveEngine) IndexChannel(rctx request.CTX, channel *model.Channel, userIDs, teamMemberIDs []string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.channelIndex == nil { return nil } doc := ChannelDocument{ ID: channel.Id, TeamID: channel.TeamId, Type: string(channel.Type), Name: channel.Name, DisplayName: channel.DisplayName, Header: channel.Header, Purpose: channel.Purpose, UserIDs: userIDs, TeamMemberIDs: teamMemberIDs, } if err := b.channelIndex.Index(channel.Id, doc); err != nil { return model.NewAppError("BleveEngine.IndexChannel", "searchengine.bleve.index_channel", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) SyncBulkIndexChannels(rctx request.CTX, channels []*model.Channel, getUserIDsForChannel func(channel *model.Channel) ([]string, error), teamMemberIDs []string) *model.AppError { for _, channel := range channels { userIDs, _ := getUserIDsForChannel(channel) if err := b.IndexChannel(rctx, channel, userIDs, teamMemberIDs); err != nil { return err } } return nil } func (b *BleveEngine) SearchChannels(teamId, userID, term string, isGuest, includeDeleted bool) ([]string, *model.AppError) { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.channelIndex == nil { return nil, model.NewAppError("BleveEngine.SearchChannels", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } queries := []query.Query{} // Team filter if teamId != "" { q := bleve.NewTermQuery(teamId) q.SetField("team_id") queries = append(queries, q) } // Search term if term != "" { // Search in name and display_name nameQuery := bleve.NewMatchQuery(term) nameQuery.SetField("name") displayQuery := bleve.NewMatchQuery(term) displayQuery.SetField("display_name") termQuery := bleve.NewDisjunctionQuery(nameQuery, displayQuery) queries = append(queries, termQuery) } // For guests, filter by user_ids containing the user if isGuest && userID != "" { q := bleve.NewTermQuery(userID) q.SetField("user_ids") queries = append(queries, q) } var finalQuery query.Query if len(queries) > 0 { finalQuery = bleve.NewConjunctionQuery(queries...) } else { finalQuery = bleve.NewMatchAllQuery() } searchRequest := bleve.NewSearchRequest(finalQuery) searchRequest.Size = 100 result, err := b.channelIndex.Search(searchRequest) if err != nil { return nil, model.NewAppError("BleveEngine.SearchChannels", "searchengine.bleve.search_channels", nil, err.Error(), http.StatusInternalServerError) } channelIDs := make([]string, len(result.Hits)) for i, hit := range result.Hits { channelIDs[i] = hit.ID } return channelIDs, nil } func (b *BleveEngine) DeleteChannel(channel *model.Channel) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.channelIndex == nil { return nil } if err := b.channelIndex.Delete(channel.Id); err != nil { return model.NewAppError("BleveEngine.DeleteChannel", "searchengine.bleve.delete_channel", nil, err.Error(), http.StatusInternalServerError) } return nil } // User operations func (b *BleveEngine) IndexUser(rctx request.CTX, user *model.User, teamsIds, channelsIds []string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.userIndex == nil { return nil } doc := UserDocument{ ID: user.Id, Username: user.Username, Nickname: user.Nickname, FirstName: user.FirstName, LastName: user.LastName, Email: user.Email, TeamIDs: teamsIds, ChannelIDs: channelsIds, DeleteAt: user.DeleteAt, } if err := b.userIndex.Index(user.Id, doc); err != nil { return model.NewAppError("BleveEngine.IndexUser", "searchengine.bleve.index_user", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) SearchUsersInChannel(teamId, channelId string, restrictedToChannels []string, term string, options *model.UserSearchOptions) ([]string, []string, *model.AppError) { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.userIndex == nil { return nil, nil, model.NewAppError("BleveEngine.SearchUsersInChannel", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } queries := []query.Query{} // Channel filter if channelId != "" { q := bleve.NewTermQuery(channelId) q.SetField("channel_ids") queries = append(queries, q) } // Search term if term != "" { termQueries := []query.Query{} usernameQuery := bleve.NewPrefixQuery(strings.ToLower(term)) usernameQuery.SetField("username") termQueries = append(termQueries, usernameQuery) nicknameQuery := bleve.NewMatchQuery(term) nicknameQuery.SetField("nickname") termQueries = append(termQueries, nicknameQuery) firstQuery := bleve.NewMatchQuery(term) firstQuery.SetField("first_name") termQueries = append(termQueries, firstQuery) lastQuery := bleve.NewMatchQuery(term) lastQuery.SetField("last_name") termQueries = append(termQueries, lastQuery) queries = append(queries, bleve.NewDisjunctionQuery(termQueries...)) } // Exclude deleted users if !options.AllowInactive { q := bleve.NewNumericRangeQuery(nil, floatPtr(0)) q.SetField("delete_at") queries = append(queries, q) } var finalQuery query.Query if len(queries) > 0 { finalQuery = bleve.NewConjunctionQuery(queries...) } else { finalQuery = bleve.NewMatchAllQuery() } searchRequest := bleve.NewSearchRequest(finalQuery) searchRequest.Size = options.Limit result, err := b.userIndex.Search(searchRequest) if err != nil { return nil, nil, model.NewAppError("BleveEngine.SearchUsersInChannel", "searchengine.bleve.search_users", nil, err.Error(), http.StatusInternalServerError) } userIDs := make([]string, len(result.Hits)) for i, hit := range result.Hits { userIDs[i] = hit.ID } // Return same list for both in-channel and out-of-channel for simplicity return userIDs, nil, nil } func (b *BleveEngine) SearchUsersInTeam(teamId string, restrictedToChannels []string, term string, options *model.UserSearchOptions) ([]string, *model.AppError) { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.userIndex == nil { return nil, model.NewAppError("BleveEngine.SearchUsersInTeam", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } queries := []query.Query{} // Team filter if teamId != "" { q := bleve.NewTermQuery(teamId) q.SetField("team_ids") queries = append(queries, q) } // Search term if term != "" { termQueries := []query.Query{} usernameQuery := bleve.NewPrefixQuery(strings.ToLower(term)) usernameQuery.SetField("username") termQueries = append(termQueries, usernameQuery) nicknameQuery := bleve.NewMatchQuery(term) nicknameQuery.SetField("nickname") termQueries = append(termQueries, nicknameQuery) firstQuery := bleve.NewMatchQuery(term) firstQuery.SetField("first_name") termQueries = append(termQueries, firstQuery) lastQuery := bleve.NewMatchQuery(term) lastQuery.SetField("last_name") termQueries = append(termQueries, lastQuery) queries = append(queries, bleve.NewDisjunctionQuery(termQueries...)) } var finalQuery query.Query if len(queries) > 0 { finalQuery = bleve.NewConjunctionQuery(queries...) } else { finalQuery = bleve.NewMatchAllQuery() } searchRequest := bleve.NewSearchRequest(finalQuery) searchRequest.Size = options.Limit result, err := b.userIndex.Search(searchRequest) if err != nil { return nil, model.NewAppError("BleveEngine.SearchUsersInTeam", "searchengine.bleve.search_users", nil, err.Error(), http.StatusInternalServerError) } userIDs := make([]string, len(result.Hits)) for i, hit := range result.Hits { userIDs[i] = hit.ID } return userIDs, nil } func (b *BleveEngine) DeleteUser(user *model.User) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.userIndex == nil { return nil } if err := b.userIndex.Delete(user.Id); err != nil { return model.NewAppError("BleveEngine.DeleteUser", "searchengine.bleve.delete_user", nil, err.Error(), http.StatusInternalServerError) } return nil } // File operations func (b *BleveEngine) IndexFile(file *model.FileInfo, channelId string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil } doc := FileDocument{ ID: file.Id, ChannelID: channelId, PostID: file.PostId, UserID: file.CreatorId, Name: file.Name, Extension: file.Extension, Content: file.Content, CreateAt: time.Unix(0, file.CreateAt*int64(time.Millisecond)), } if err := b.fileIndex.Index(file.Id, doc); err != nil { return model.NewAppError("BleveEngine.IndexFile", "searchengine.bleve.index_file", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) SearchFiles(channels model.ChannelList, searchParams []*model.SearchParams, page, perPage int) ([]string, *model.AppError) { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil, model.NewAppError("BleveEngine.SearchFiles", "searchengine.bleve.not_started", nil, "", http.StatusServiceUnavailable) } // Build channel ID filter channelIDs := make([]string, len(channels)) for i, ch := range channels { channelIDs[i] = ch.Id } queries := []query.Query{} for _, params := range searchParams { if params.Terms != "" { terms := strings.Fields(params.Terms) for _, term := range terms { // Search in name and content nameQuery := bleve.NewMatchQuery(term) nameQuery.SetField("name") contentQuery := bleve.NewMatchQuery(term) contentQuery.SetField("content") queries = append(queries, bleve.NewDisjunctionQuery(nameQuery, contentQuery)) } } // Extension filter if len(params.Extensions) > 0 { extQueries := make([]query.Query, len(params.Extensions)) for i, ext := range params.Extensions { q := bleve.NewTermQuery(ext) q.SetField("extension") extQueries[i] = q } queries = append(queries, bleve.NewDisjunctionQuery(extQueries...)) } } // Channel filter channelQueries := make([]query.Query, len(channelIDs)) for i, chID := range channelIDs { q := bleve.NewTermQuery(chID) q.SetField("channel_id") channelQueries[i] = q } channelFilter := bleve.NewDisjunctionQuery(channelQueries...) var finalQuery query.Query if len(queries) > 0 { mainQuery := bleve.NewConjunctionQuery(queries...) finalQuery = bleve.NewConjunctionQuery(mainQuery, channelFilter) } else { finalQuery = channelFilter } searchRequest := bleve.NewSearchRequestOptions(finalQuery, perPage, page*perPage, false) searchRequest.SortBy([]string{"-create_at"}) result, err := b.fileIndex.Search(searchRequest) if err != nil { return nil, model.NewAppError("BleveEngine.SearchFiles", "searchengine.bleve.search_files", nil, err.Error(), http.StatusInternalServerError) } fileIDs := make([]string, len(result.Hits)) for i, hit := range result.Hits { fileIDs[i] = hit.ID } return fileIDs, nil } func (b *BleveEngine) DeleteFile(fileID string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil } if err := b.fileIndex.Delete(fileID); err != nil { return model.NewAppError("BleveEngine.DeleteFile", "searchengine.bleve.delete_file", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) DeletePostFiles(rctx request.CTX, postID string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil } q := bleve.NewTermQuery(postID) q.SetField("post_id") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 1000 result, err := b.fileIndex.Search(searchRequest) if err != nil { return model.NewAppError("BleveEngine.DeletePostFiles", "searchengine.bleve.search_post_files", nil, err.Error(), http.StatusInternalServerError) } batch := b.fileIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } if err := b.fileIndex.Batch(batch); err != nil { return model.NewAppError("BleveEngine.DeletePostFiles", "searchengine.bleve.delete_post_files", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) DeleteUserFiles(rctx request.CTX, userID string) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil } q := bleve.NewTermQuery(userID) q.SetField("user_id") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 10000 result, err := b.fileIndex.Search(searchRequest) if err != nil { return model.NewAppError("BleveEngine.DeleteUserFiles", "searchengine.bleve.search_user_files", nil, err.Error(), http.StatusInternalServerError) } batch := b.fileIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } if err := b.fileIndex.Batch(batch); err != nil { return model.NewAppError("BleveEngine.DeleteUserFiles", "searchengine.bleve.delete_user_files", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) DeleteFilesBatch(rctx request.CTX, endTime, limit int64) *model.AppError { b.mutex.RLock() defer b.mutex.RUnlock() if !b.started || b.fileIndex == nil { return nil } endTimeFloat := float64(endTime) q := bleve.NewNumericRangeQuery(nil, &endTimeFloat) q.SetField("create_at") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = int(limit) result, err := b.fileIndex.Search(searchRequest) if err != nil { return model.NewAppError("BleveEngine.DeleteFilesBatch", "searchengine.bleve.search_files_batch", nil, err.Error(), http.StatusInternalServerError) } batch := b.fileIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } if err := b.fileIndex.Batch(batch); err != nil { return model.NewAppError("BleveEngine.DeleteFilesBatch", "searchengine.bleve.delete_files_batch", nil, err.Error(), http.StatusInternalServerError) } return nil } // Admin operations func (b *BleveEngine) TestConfig(rctx request.CTX, cfg *model.Config) *model.AppError { // Test if we can create/access the index directory testDir := b.settings.IndexDir if err := os.MkdirAll(testDir, 0755); err != nil { return model.NewAppError("BleveEngine.TestConfig", "searchengine.bleve.test_config", nil, err.Error(), http.StatusInternalServerError) } return nil } func (b *BleveEngine) PurgeIndexes(rctx request.CTX) *model.AppError { b.mutex.Lock() defer b.mutex.Unlock() // Close existing indexes if b.postIndex != nil { b.postIndex.Close() } if b.channelIndex != nil { b.channelIndex.Close() } if b.userIndex != nil { b.userIndex.Close() } if b.fileIndex != nil { b.fileIndex.Close() } // Remove index directories indexes := []string{PostIndexName, ChannelIndexName, UserIndexName, FileIndexName} for _, idx := range indexes { indexPath := filepath.Join(b.settings.IndexDir, idx+".bleve") if err := os.RemoveAll(indexPath); err != nil { b.logger.Warn("Failed to remove index directory", mlog.String("index", idx), mlog.Err(err), ) } } // Recreate indexes var err error b.postIndex, err = b.openOrCreateIndex(PostIndexName, b.createPostMapping()) if err != nil { return model.NewAppError("BleveEngine.PurgeIndexes", "searchengine.bleve.recreate_post_index", nil, err.Error(), http.StatusInternalServerError) } b.channelIndex, err = b.openOrCreateIndex(ChannelIndexName, b.createChannelMapping()) if err != nil { return model.NewAppError("BleveEngine.PurgeIndexes", "searchengine.bleve.recreate_channel_index", nil, err.Error(), http.StatusInternalServerError) } b.userIndex, err = b.openOrCreateIndex(UserIndexName, b.createUserMapping()) if err != nil { return model.NewAppError("BleveEngine.PurgeIndexes", "searchengine.bleve.recreate_user_index", nil, err.Error(), http.StatusInternalServerError) } b.fileIndex, err = b.openOrCreateIndex(FileIndexName, b.createFileMapping()) if err != nil { return model.NewAppError("BleveEngine.PurgeIndexes", "searchengine.bleve.recreate_file_index", nil, err.Error(), http.StatusInternalServerError) } b.logger.Info("Purged all Bleve indexes") return nil } func (b *BleveEngine) PurgeIndexList(rctx request.CTX, indexes []string) *model.AppError { // For Bleve, we just purge all indexes return b.PurgeIndexes(rctx) } func (b *BleveEngine) RefreshIndexes(rctx request.CTX) *model.AppError { // Bleve doesn't need explicit refresh like Elasticsearch return nil } func (b *BleveEngine) DataRetentionDeleteIndexes(rctx request.CTX, cutoff time.Time) *model.AppError { // Delete posts before cutoff cutoffMs := cutoff.UnixMilli() cutoffFloat := float64(cutoffMs) b.mutex.RLock() defer b.mutex.RUnlock() if b.postIndex != nil { q := bleve.NewNumericRangeQuery(nil, &cutoffFloat) q.SetField("create_at") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 10000 result, err := b.postIndex.Search(searchRequest) if err == nil { batch := b.postIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } b.postIndex.Batch(batch) } } if b.fileIndex != nil { q := bleve.NewNumericRangeQuery(nil, &cutoffFloat) q.SetField("create_at") searchRequest := bleve.NewSearchRequest(q) searchRequest.Size = 10000 result, err := b.fileIndex.Search(searchRequest) if err == nil { batch := b.fileIndex.NewBatch() for _, hit := range result.Hits { batch.Delete(hit.ID) } b.fileIndex.Batch(batch) } } b.logger.Info("Data retention delete completed", mlog.Time("cutoff", cutoff), ) return nil } // Helper functions func (b *BleveEngine) openOrCreateIndex(name string, indexMapping mapping.IndexMapping) (bleve.Index, error) { indexPath := filepath.Join(b.settings.IndexDir, name+".bleve") // Try to open existing index index, err := bleve.Open(indexPath) if err == nil { return index, nil } // Create new index return bleve.New(indexPath, indexMapping) } func (b *BleveEngine) createPostMapping() mapping.IndexMapping { // Create custom analyzer for multilingual support indexMapping := bleve.NewIndexMapping() // Register CJK analyzer err := indexMapping.AddCustomAnalyzer("multilingual", map[string]interface{}{ "type": custom.Name, "tokenizer": unicode.Name, "token_filters": []string{ cjk.BigramName, lowercase.Name, porter.Name, }, }) if err != nil { b.logger.Warn("Failed to add multilingual analyzer", mlog.Err(err)) } postMapping := bleve.NewDocumentMapping() // ID field - keyword idField := bleve.NewTextFieldMapping() idField.Analyzer = keyword.Name postMapping.AddFieldMappingsAt("id", idField) // Team ID - keyword teamField := bleve.NewTextFieldMapping() teamField.Analyzer = keyword.Name postMapping.AddFieldMappingsAt("team_id", teamField) // Channel ID - keyword channelField := bleve.NewTextFieldMapping() channelField.Analyzer = keyword.Name postMapping.AddFieldMappingsAt("channel_id", channelField) // User ID - keyword userField := bleve.NewTextFieldMapping() userField.Analyzer = keyword.Name postMapping.AddFieldMappingsAt("user_id", userField) // Message - full text with multilingual support messageField := bleve.NewTextFieldMapping() messageField.Analyzer = standard.Name messageField.Store = true messageField.IncludeTermVectors = true postMapping.AddFieldMappingsAt("message", messageField) // Hashtags hashtagField := bleve.NewTextFieldMapping() hashtagField.Analyzer = keyword.Name postMapping.AddFieldMappingsAt("hashtags", hashtagField) // Create time - date createField := bleve.NewDateTimeFieldMapping() postMapping.AddFieldMappingsAt("create_at", createField) indexMapping.AddDocumentMapping("post", postMapping) indexMapping.DefaultMapping = postMapping return indexMapping } func (b *BleveEngine) createChannelMapping() mapping.IndexMapping { indexMapping := bleve.NewIndexMapping() channelMapping := bleve.NewDocumentMapping() // ID field idField := bleve.NewTextFieldMapping() idField.Analyzer = keyword.Name channelMapping.AddFieldMappingsAt("id", idField) // Team ID teamField := bleve.NewTextFieldMapping() teamField.Analyzer = keyword.Name channelMapping.AddFieldMappingsAt("team_id", teamField) // Type typeField := bleve.NewTextFieldMapping() typeField.Analyzer = keyword.Name channelMapping.AddFieldMappingsAt("type", typeField) // Name - text search nameField := bleve.NewTextFieldMapping() nameField.Analyzer = standard.Name channelMapping.AddFieldMappingsAt("name", nameField) // Display name - text search displayField := bleve.NewTextFieldMapping() displayField.Analyzer = standard.Name channelMapping.AddFieldMappingsAt("display_name", displayField) // User IDs - for private channel access userIDsField := bleve.NewTextFieldMapping() userIDsField.Analyzer = keyword.Name channelMapping.AddFieldMappingsAt("user_ids", userIDsField) indexMapping.AddDocumentMapping("channel", channelMapping) indexMapping.DefaultMapping = channelMapping return indexMapping } func (b *BleveEngine) createUserMapping() mapping.IndexMapping { indexMapping := bleve.NewIndexMapping() userMapping := bleve.NewDocumentMapping() // ID idField := bleve.NewTextFieldMapping() idField.Analyzer = keyword.Name userMapping.AddFieldMappingsAt("id", idField) // Username - both keyword and prefix search usernameField := bleve.NewTextFieldMapping() usernameField.Analyzer = standard.Name userMapping.AddFieldMappingsAt("username", usernameField) // Nickname nicknameField := bleve.NewTextFieldMapping() nicknameField.Analyzer = standard.Name userMapping.AddFieldMappingsAt("nickname", nicknameField) // First name firstField := bleve.NewTextFieldMapping() firstField.Analyzer = standard.Name userMapping.AddFieldMappingsAt("first_name", firstField) // Last name lastField := bleve.NewTextFieldMapping() lastField.Analyzer = standard.Name userMapping.AddFieldMappingsAt("last_name", lastField) // Team IDs teamField := bleve.NewTextFieldMapping() teamField.Analyzer = keyword.Name userMapping.AddFieldMappingsAt("team_ids", teamField) // Channel IDs channelField := bleve.NewTextFieldMapping() channelField.Analyzer = keyword.Name userMapping.AddFieldMappingsAt("channel_ids", channelField) // Delete at - numeric for filtering active users deleteField := bleve.NewNumericFieldMapping() userMapping.AddFieldMappingsAt("delete_at", deleteField) indexMapping.AddDocumentMapping("user", userMapping) indexMapping.DefaultMapping = userMapping return indexMapping } func (b *BleveEngine) createFileMapping() mapping.IndexMapping { indexMapping := bleve.NewIndexMapping() fileMapping := bleve.NewDocumentMapping() // ID idField := bleve.NewTextFieldMapping() idField.Analyzer = keyword.Name fileMapping.AddFieldMappingsAt("id", idField) // Channel ID channelField := bleve.NewTextFieldMapping() channelField.Analyzer = keyword.Name fileMapping.AddFieldMappingsAt("channel_id", channelField) // Post ID postField := bleve.NewTextFieldMapping() postField.Analyzer = keyword.Name fileMapping.AddFieldMappingsAt("post_id", postField) // User ID userField := bleve.NewTextFieldMapping() userField.Analyzer = keyword.Name fileMapping.AddFieldMappingsAt("user_id", userField) // Name - text search nameField := bleve.NewTextFieldMapping() nameField.Analyzer = standard.Name fileMapping.AddFieldMappingsAt("name", nameField) // Extension extField := bleve.NewTextFieldMapping() extField.Analyzer = keyword.Name fileMapping.AddFieldMappingsAt("extension", extField) // Content - full text contentField := bleve.NewTextFieldMapping() contentField.Analyzer = standard.Name fileMapping.AddFieldMappingsAt("content", contentField) // Create time createField := bleve.NewDateTimeFieldMapping() fileMapping.AddFieldMappingsAt("create_at", createField) indexMapping.AddDocumentMapping("file", fileMapping) indexMapping.DefaultMapping = fileMapping return indexMapping } func floatPtr(f float64) *float64 { return &f }