Full Mattermost server source with integrated Community Enterprise features. Includes vendor directory for offline/air-gapped builds. Structure: - enterprise-impl/: Enterprise feature implementations - enterprise-community/: Init files that register implementations - enterprise/: Bridge imports (community_imports.go) - vendor/: All dependencies for offline builds Build (online): go build ./cmd/mattermost Build (offline/air-gapped): go build -mod=vendor ./cmd/mattermost 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
99 lines
2.1 KiB
Go
99 lines
2.1 KiB
Go
package justext
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"github.com/levigross/exp-html"
|
|
"io"
|
|
"io/ioutil"
|
|
"strings"
|
|
)
|
|
|
|
type Reader struct {
|
|
LengthLow int
|
|
LengthHigh int
|
|
Stoplist map[string]bool
|
|
StopwordsLow float64
|
|
StopwordsHigh float64
|
|
MaxLinkDensity float64
|
|
MaxHeadingDistance int
|
|
NoHeadings bool
|
|
r io.Reader
|
|
}
|
|
|
|
func NewReader(r io.Reader) *Reader {
|
|
return &Reader{
|
|
LengthLow: 70,
|
|
LengthHigh: 200,
|
|
StopwordsLow: 0.30,
|
|
StopwordsHigh: 0.32,
|
|
MaxLinkDensity: 0.2,
|
|
MaxHeadingDistance: 200,
|
|
NoHeadings: false,
|
|
r: r,
|
|
}
|
|
}
|
|
|
|
func (r *Reader) ReadAll() ([]*Paragraph, error) {
|
|
in, err := ioutil.ReadAll(r.r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
root, err := preprocess(string(in), "utf-8", "utf-8", "errors")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if root == nil {
|
|
return nil, errors.New("Preprocess has resulted in nil")
|
|
}
|
|
|
|
htmlSource := nodesToString(root)
|
|
if len(htmlSource) == 0 {
|
|
return nil, errors.New("MAIN: perprocess has returned an empty string")
|
|
}
|
|
|
|
p, err := paragraphObjectModel(htmlSource)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if p == nil {
|
|
return nil, errors.New("MAIN: P is nil")
|
|
}
|
|
|
|
classifyParagraphs(p, r.Stoplist, r.LengthLow, r.LengthHigh, r.StopwordsLow, r.StopwordsHigh, r.MaxLinkDensity, r.NoHeadings)
|
|
reviseParagraphClassification(p, r.MaxHeadingDistance)
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func dumpNodes(n *html.Node, tab int, exploreChildNodes bool) string {
|
|
var childNodes string = ""
|
|
if exploreChildNodes == true {
|
|
if len(n.Child) > 0 {
|
|
for _, c := range n.Child {
|
|
childNodes = fmt.Sprintf("%s%s\n", childNodes, dumpNodes(c, tab+1, true))
|
|
}
|
|
}
|
|
}
|
|
|
|
var t string
|
|
switch n.Type {
|
|
case html.ErrorNode:
|
|
t = "Err"
|
|
case html.TextNode:
|
|
t = "T"
|
|
case html.DocumentNode:
|
|
t = "D"
|
|
case html.ElementNode:
|
|
t = "E"
|
|
case html.CommentNode:
|
|
t = "C"
|
|
case html.DoctypeNode:
|
|
t = "Dt"
|
|
}
|
|
|
|
tabStr := strings.Repeat(" ", tab)
|
|
return fmt.Sprintf("%s%s:%s\n%s", tabStr, t, strings.TrimSpace(strings.Replace(n.Data, "\n", "", -1)), childNodes)
|
|
}
|