Full Mattermost server source with integrated Community Enterprise features. Includes vendor directory for offline/air-gapped builds. Structure: - enterprise-impl/: Enterprise feature implementations - enterprise-community/: Init files that register implementations - enterprise/: Bridge imports (community_imports.go) - vendor/: All dependencies for offline builds Build (online): go build ./cmd/mattermost Build (offline/air-gapped): go build -mod=vendor ./cmd/mattermost 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
215 lines
5.3 KiB
Go
215 lines
5.3 KiB
Go
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
// See LICENSE.txt for license information.
|
|
|
|
package docextractor
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"io"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/mattermost/mattermost/server/public/shared/mlog"
|
|
"github.com/mattermost/mattermost/server/v8/channels/utils/testutils"
|
|
)
|
|
|
|
func TestExtract(t *testing.T) {
|
|
logger := mlog.CreateConsoleTestLogger(t)
|
|
|
|
testCases := []struct {
|
|
Name string
|
|
TestFileName string
|
|
Settings ExtractSettings
|
|
Contains []string
|
|
NotContains []string
|
|
ExpectError bool
|
|
}{
|
|
{
|
|
"Plain text file",
|
|
"test-markdown-basics.md",
|
|
ExtractSettings{},
|
|
[]string{"followed", "separated", "Basic"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Plain small text file",
|
|
"test-hashtags.md",
|
|
ExtractSettings{},
|
|
[]string{"should", "render", "strings"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Zip file without recursion",
|
|
"Fake_Team_Import.zip",
|
|
ExtractSettings{},
|
|
[]string{"users", "channels", "general"},
|
|
[]string{"purpose", "announcements"},
|
|
false,
|
|
},
|
|
{
|
|
"Zip file with recursion",
|
|
"Fake_Team_Import.zip",
|
|
ExtractSettings{ArchiveRecursion: true},
|
|
[]string{"users", "channels", "general", "purpose", "announcements"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Rar file without recursion",
|
|
"Fake_Team_Import.rar",
|
|
ExtractSettings{},
|
|
[]string{"users", "channels", "general"},
|
|
[]string{"purpose", "announcements"},
|
|
false,
|
|
},
|
|
{
|
|
"Rar file with recursion",
|
|
"Fake_Team_Import.rar",
|
|
ExtractSettings{ArchiveRecursion: true},
|
|
[]string{"users", "channels", "general", "purpose", "announcements"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Tar.gz file without recursion",
|
|
"Fake_Team_Import.tar.gz",
|
|
ExtractSettings{},
|
|
[]string{"users", "channels", "general"},
|
|
[]string{"purpose", "announcements"},
|
|
false,
|
|
},
|
|
{
|
|
"Tar.gz file with recursion",
|
|
"Fake_Team_Import.tar.gz",
|
|
ExtractSettings{ArchiveRecursion: true},
|
|
[]string{"users", "channels", "general", "purpose", "announcements"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Pdf file",
|
|
"sample-doc.pdf",
|
|
ExtractSettings{},
|
|
[]string{"simple", "document", "contains"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Docx file",
|
|
"sample-doc.docx",
|
|
ExtractSettings{},
|
|
[]string{"simple", "document", "contains"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Odt file",
|
|
"sample-doc.odt",
|
|
ExtractSettings{},
|
|
[]string{"simple", "document", "contains"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
{
|
|
"Pptx file",
|
|
"sample-doc.pptx",
|
|
ExtractSettings{},
|
|
[]string{"simple", "document", "contains"},
|
|
[]string{},
|
|
false,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.Name, func(t *testing.T) {
|
|
data, err := testutils.ReadTestFile(tc.TestFileName)
|
|
require.NoError(t, err)
|
|
text, err := Extract(logger, tc.TestFileName, bytes.NewReader(data), tc.Settings)
|
|
if tc.ExpectError {
|
|
require.Error(t, err)
|
|
} else {
|
|
require.NoError(t, err)
|
|
for _, expectedString := range tc.Contains {
|
|
assert.Contains(t, text, expectedString)
|
|
}
|
|
for _, notExpectedString := range tc.NotContains {
|
|
assert.NotContains(t, text, notExpectedString)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
t.Run("Unsupported binary file", func(t *testing.T) {
|
|
data, err := testutils.ReadTestFile("testjpg.jpg")
|
|
require.NoError(t, err)
|
|
text, err := Extract(logger, "testjpg.jpg", bytes.NewReader(data), ExtractSettings{})
|
|
require.NoError(t, err)
|
|
require.Equal(t, "", text)
|
|
})
|
|
|
|
t.Run("Wrong docx extension", func(t *testing.T) {
|
|
data, err := testutils.ReadTestFile("sample-doc.pdf")
|
|
require.NoError(t, err)
|
|
text, err := Extract(logger, "sample-doc.docx", bytes.NewReader(data), ExtractSettings{})
|
|
require.NoError(t, err)
|
|
require.Equal(t, "", text)
|
|
})
|
|
}
|
|
|
|
type customTestPdfExtractor struct{}
|
|
|
|
func (te *customTestPdfExtractor) Name() string {
|
|
return "customTestPdfExtractor"
|
|
}
|
|
|
|
func (te *customTestPdfExtractor) Match(filename string) bool {
|
|
return strings.HasSuffix(filename, ".pdf")
|
|
}
|
|
|
|
func (te *customTestPdfExtractor) Extract(filename string, r io.ReadSeeker) (string, error) {
|
|
return "this is a text generated content", nil
|
|
}
|
|
|
|
type failingExtractor struct{}
|
|
|
|
func (te *failingExtractor) Name() string {
|
|
return "failingExtractor"
|
|
}
|
|
|
|
func (te *failingExtractor) Match(filename string) bool {
|
|
return true
|
|
}
|
|
|
|
func (te *failingExtractor) Extract(filename string, r io.ReadSeeker) (string, error) {
|
|
return "", errors.New("this always fail")
|
|
}
|
|
|
|
func TestExtractWithExtraExtractors(t *testing.T) {
|
|
logger := mlog.CreateConsoleTestLogger(t)
|
|
t.Run("override existing extractor", func(t *testing.T) {
|
|
data, err := testutils.ReadTestFile("sample-doc.pdf")
|
|
require.NoError(t, err)
|
|
|
|
text, err := ExtractWithExtraExtractors(logger, "sample-doc.pdf", bytes.NewReader(data), ExtractSettings{}, []Extractor{&customTestPdfExtractor{}})
|
|
require.NoError(t, err)
|
|
require.Equal(t, text, "this is a text generated content")
|
|
})
|
|
|
|
t.Run("failing extractor", func(t *testing.T) {
|
|
data, err := testutils.ReadTestFile("sample-doc.pdf")
|
|
require.NoError(t, err)
|
|
|
|
text, err := ExtractWithExtraExtractors(logger, "sample-doc.pdf", bytes.NewReader(data), ExtractSettings{}, []Extractor{&failingExtractor{}})
|
|
require.NoError(t, err)
|
|
assert.Contains(t, text, "simple")
|
|
assert.Contains(t, text, "document")
|
|
assert.Contains(t, text, "contains")
|
|
})
|
|
}
|