Full Mattermost server source with integrated Community Enterprise features. Includes vendor directory for offline/air-gapped builds. Structure: - enterprise-impl/: Enterprise feature implementations - enterprise-community/: Init files that register implementations - enterprise/: Bridge imports (community_imports.go) - vendor/: All dependencies for offline builds Build (online): go build ./cmd/mattermost Build (offline/air-gapped): go build -mod=vendor ./cmd/mattermost 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
177 lines
3.4 KiB
Go
177 lines
3.4 KiB
Go
//go:build ocr
|
|
|
|
package docconv
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
var (
|
|
exts = []string{".jpg", ".tif", ".tiff", ".png", ".pbm"}
|
|
)
|
|
|
|
func compareExt(ext string, exts []string) bool {
|
|
for _, e := range exts {
|
|
if ext == e {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func ConvertPDFImages(path string) (BodyResult, error) {
|
|
bodyResult := BodyResult{}
|
|
|
|
tmp, err := os.MkdirTemp(os.TempDir(), "tmp-imgs-")
|
|
if err != nil {
|
|
bodyResult.err = err
|
|
return bodyResult, err
|
|
}
|
|
tmpDir := fmt.Sprintf("%s/", tmp)
|
|
|
|
defer func() {
|
|
_ = os.RemoveAll(tmpDir) // ignore error
|
|
}()
|
|
|
|
_, err = exec.Command("pdfimages", "-j", path, tmpDir).Output()
|
|
if err != nil {
|
|
return bodyResult, err
|
|
}
|
|
|
|
filePaths := []string{}
|
|
|
|
walkFunc := func(path string, info os.FileInfo, err error) error {
|
|
path, err = filepath.Abs(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if compareExt(filepath.Ext(path), exts) {
|
|
filePaths = append(filePaths, path)
|
|
}
|
|
return nil
|
|
}
|
|
filepath.Walk(tmpDir, walkFunc)
|
|
|
|
fileLength := len(filePaths)
|
|
|
|
if fileLength < 1 {
|
|
return bodyResult, nil
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
data := make(chan string, fileLength)
|
|
|
|
wg.Add(fileLength)
|
|
|
|
for _, p := range filePaths {
|
|
go func(pathFile string) {
|
|
defer wg.Done()
|
|
f, err := os.Open(pathFile)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
defer f.Close()
|
|
out, _, err := ConvertImage(f)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
data <- out
|
|
|
|
}(p)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
close(data)
|
|
|
|
for str := range data {
|
|
bodyResult.body += str + " "
|
|
}
|
|
|
|
return bodyResult, nil
|
|
}
|
|
|
|
// PdfHasImage verify if `path` (PDF) has images
|
|
func PDFHasImage(path string) (bool, error) {
|
|
cmd := "pdffonts -l 5 %s | tail -n +3 | cut -d' ' -f1 | sort | uniq"
|
|
out, err := exec.Command("bash", "-c", fmt.Sprintf(cmd, shellEscape(path))).CombinedOutput()
|
|
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if string(out) == "" {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func ConvertPDF(r io.Reader) (string, map[string]string, error) {
|
|
f, err := NewLocalFile(r)
|
|
if err != nil {
|
|
return "", nil, fmt.Errorf("error creating local file: %v", err)
|
|
}
|
|
defer f.Done()
|
|
|
|
bodyResult, metaResult, textConvertErr := ConvertPDFText(f.Name())
|
|
if textConvertErr != nil {
|
|
return "", nil, textConvertErr
|
|
}
|
|
if bodyResult.err != nil {
|
|
return "", nil, bodyResult.err
|
|
}
|
|
if metaResult.err != nil {
|
|
return "", nil, metaResult.err
|
|
}
|
|
|
|
hasImage, err := PDFHasImage(f.Name())
|
|
if err != nil {
|
|
return "", nil, fmt.Errorf("could not check if PDF has image: %w", err)
|
|
}
|
|
if !hasImage {
|
|
return bodyResult.body, metaResult.meta, nil
|
|
}
|
|
|
|
imageConvertResult, imageConvertErr := ConvertPDFImages(f.Name())
|
|
if imageConvertErr != nil {
|
|
return bodyResult.body, metaResult.meta, nil // ignore error, return what we have
|
|
}
|
|
if imageConvertResult.err != nil {
|
|
return bodyResult.body, metaResult.meta, nil // ignore error, return what we have
|
|
}
|
|
|
|
fullBody := strings.Join([]string{bodyResult.body, imageConvertResult.body}, " ")
|
|
|
|
return fullBody, metaResult.meta, nil
|
|
|
|
}
|
|
|
|
var shellEscapePattern *regexp.Regexp
|
|
|
|
func init() {
|
|
shellEscapePattern = regexp.MustCompile(`[^\w@%+=:,./-]`)
|
|
}
|
|
|
|
// shellEscape returns a shell-escaped version of the string s. The returned value
|
|
// is a string that can safely be used as one token in a shell command line.
|
|
func shellEscape(s string) string {
|
|
if len(s) == 0 {
|
|
return "''"
|
|
}
|
|
if shellEscapePattern.MatchString(s) {
|
|
return "'" + strings.Replace(s, "'", "'\"'\"'", -1) + "'"
|
|
}
|
|
|
|
return s
|
|
}
|