Full Mattermost server source with integrated Community Enterprise features. Includes vendor directory for offline/air-gapped builds. Structure: - enterprise-impl/: Enterprise feature implementations - enterprise-community/: Init files that register implementations - enterprise/: Bridge imports (community_imports.go) - vendor/: All dependencies for offline builds Build (online): go build ./cmd/mattermost Build (offline/air-gapped): go build -mod=vendor ./cmd/mattermost 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
329 lines
6.4 KiB
Go
329 lines
6.4 KiB
Go
package goose
|
|
|
|
import (
|
|
"github.com/PuerkitoBio/goquery"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
type candidate struct {
|
|
url string
|
|
surface int
|
|
score int
|
|
}
|
|
|
|
func (c *candidate) GetUrl() string {
|
|
return c.url
|
|
}
|
|
|
|
var largebig = regexp.MustCompile("(large|big|full)")
|
|
|
|
var classRules = map[*regexp.Regexp]int{
|
|
regexp.MustCompile("(promo|ads|banner)"): -1}
|
|
|
|
var rules = map[*regexp.Regexp]int{
|
|
largebig: 1,
|
|
regexp.MustCompile("upload"): 1,
|
|
regexp.MustCompile("media"): 1,
|
|
regexp.MustCompile("gravatar.com"): -1,
|
|
regexp.MustCompile("feeds.feedburner.com"): -1,
|
|
regexp.MustCompile("(?i)icon"): -1,
|
|
regexp.MustCompile("(?i)logo"): -1,
|
|
regexp.MustCompile("(?i)spinner"): -1,
|
|
regexp.MustCompile("(?i)loading"): -1,
|
|
regexp.MustCompile("(?i)ads"): -1,
|
|
regexp.MustCompile("badge"): -1,
|
|
regexp.MustCompile("1x1"): -1,
|
|
regexp.MustCompile("pixel"): -1,
|
|
regexp.MustCompile("thumbnail[s]*"): -1,
|
|
regexp.MustCompile(".html|" +
|
|
".gif|" +
|
|
".ico|" +
|
|
"button|" +
|
|
"twitter.jpg|" +
|
|
"facebook.jpg|" +
|
|
"ap_buy_photo|" +
|
|
"digg.jpg|" +
|
|
"digg.png|" +
|
|
"delicious.png|" +
|
|
"facebook.png|" +
|
|
"reddit.jpg|" +
|
|
"doubleclick|" +
|
|
"diggthis|" +
|
|
"diggThis|" +
|
|
"adserver|" +
|
|
"/(ads|promos|banners)/|" +
|
|
"ec.atdmt.com|" +
|
|
"mediaplex.com|" +
|
|
"adsatt|" +
|
|
"view.atdmt"): -1}
|
|
|
|
func getImageSrc(tag *goquery.Selection) string {
|
|
src, _ := tag.Attr("src")
|
|
// skip inline images
|
|
if strings.Contains(src, "data:image/") {
|
|
src = ""
|
|
}
|
|
if src == "" {
|
|
src, _ = tag.Attr("data-src")
|
|
}
|
|
if src == "" {
|
|
src, _ = tag.Attr("data-lazy-src")
|
|
}
|
|
return src
|
|
}
|
|
|
|
func score(tag *goquery.Selection) int {
|
|
src := getImageSrc(tag)
|
|
if src == "" {
|
|
return -1
|
|
}
|
|
tagScore := 0
|
|
for rule, score := range rules {
|
|
if rule.MatchString(src) {
|
|
tagScore += score
|
|
}
|
|
}
|
|
|
|
alt, exists := tag.Attr("alt")
|
|
if exists {
|
|
if strings.Contains(alt, "thumbnail") {
|
|
tagScore--
|
|
}
|
|
}
|
|
|
|
id, exists := tag.Attr("id")
|
|
if exists {
|
|
if id == "fbPhotoImage" {
|
|
tagScore++
|
|
}
|
|
}
|
|
|
|
class, exists := tag.Attr("class")
|
|
if exists {
|
|
for rule, score := range classRules {
|
|
if rule.MatchString(class) {
|
|
tagScore += score
|
|
}
|
|
}
|
|
}
|
|
|
|
return tagScore
|
|
}
|
|
|
|
// WebPageImageResolver fetches all candidate images from the HTML page
|
|
func WebPageImageResolver(doc *goquery.Document) ([]candidate, int) {
|
|
imgs := doc.Find("img")
|
|
|
|
var candidates []candidate
|
|
significantSurface := 320 * 200
|
|
significantSurfaceCount := 0
|
|
src := ""
|
|
imgs.Each(func(i int, tag *goquery.Selection) {
|
|
var surface int
|
|
src = getImageSrc(tag)
|
|
if src == "" {
|
|
return
|
|
}
|
|
|
|
width, _ := tag.Attr("width")
|
|
height, _ := tag.Attr("height")
|
|
if width != "" {
|
|
w, _ := strconv.Atoi(width)
|
|
if height != "" {
|
|
h, _ := strconv.Atoi(height)
|
|
surface = w * h
|
|
} else {
|
|
surface = w
|
|
}
|
|
} else {
|
|
if height != "" {
|
|
surface, _ = strconv.Atoi(height)
|
|
} else {
|
|
surface = 0
|
|
}
|
|
}
|
|
|
|
if surface > significantSurface {
|
|
significantSurfaceCount++
|
|
}
|
|
|
|
tagscore := score(tag)
|
|
if tagscore >= 0 {
|
|
c := candidate{
|
|
url: src,
|
|
surface: surface,
|
|
score: score(tag),
|
|
}
|
|
candidates = append(candidates, c)
|
|
}
|
|
})
|
|
|
|
if len(candidates) == 0 {
|
|
return nil, 0
|
|
}
|
|
|
|
return candidates, significantSurfaceCount
|
|
|
|
}
|
|
|
|
// WebPageResolver fetches the main image from the HTML page
|
|
func WebPageResolver(article *Article) string {
|
|
candidates, significantSurfaceCount := WebPageImageResolver(article.Doc)
|
|
if candidates == nil {
|
|
return ""
|
|
}
|
|
var bestCandidate candidate
|
|
var topImage string
|
|
if significantSurfaceCount > 0 {
|
|
bestCandidate = findBestCandidateFromSurface(candidates)
|
|
} else {
|
|
bestCandidate = findBestCandidateFromScore(candidates)
|
|
}
|
|
|
|
topImage = bestCandidate.url
|
|
a, err := url.Parse(topImage)
|
|
if err != nil {
|
|
return topImage
|
|
}
|
|
finalURL, err := url.Parse(article.FinalURL)
|
|
if err != nil {
|
|
return topImage
|
|
}
|
|
b := finalURL.ResolveReference(a)
|
|
topImage = b.String()
|
|
|
|
return topImage
|
|
}
|
|
|
|
func findBestCandidateFromSurface(candidates []candidate) candidate {
|
|
max := 0
|
|
var bestCandidate candidate
|
|
for _, candidate := range candidates {
|
|
surface := candidate.surface
|
|
if surface >= max {
|
|
max = surface
|
|
bestCandidate = candidate
|
|
}
|
|
}
|
|
|
|
return bestCandidate
|
|
}
|
|
|
|
func findBestCandidateFromScore(candidates []candidate) candidate {
|
|
max := 0
|
|
var bestCandidate candidate
|
|
for _, candidate := range candidates {
|
|
score := candidate.score
|
|
if score >= max {
|
|
max = score
|
|
bestCandidate = candidate
|
|
}
|
|
}
|
|
|
|
return bestCandidate
|
|
}
|
|
|
|
type ogTag struct {
|
|
tpe string
|
|
attribute string
|
|
name string
|
|
value string
|
|
}
|
|
|
|
var ogTags = [4]ogTag{
|
|
{
|
|
tpe: "facebook",
|
|
attribute: "property",
|
|
name: "og:image",
|
|
value: "content",
|
|
},
|
|
{
|
|
tpe: "facebook",
|
|
attribute: "rel",
|
|
name: "image_src",
|
|
value: "href",
|
|
},
|
|
{
|
|
tpe: "twitter",
|
|
attribute: "name",
|
|
name: "twitter:image",
|
|
value: "value",
|
|
},
|
|
{
|
|
tpe: "twitter",
|
|
attribute: "name",
|
|
name: "twitter:image",
|
|
value: "content",
|
|
},
|
|
}
|
|
|
|
type ogImage struct {
|
|
url string
|
|
tpe string
|
|
score int
|
|
}
|
|
|
|
// OpenGraphResolver return OpenGraph properties
|
|
func OpenGraphResolver(doc *goquery.Document) string {
|
|
meta := doc.Find("meta")
|
|
links := doc.Find("link")
|
|
var topImage string
|
|
meta = meta.Union(links)
|
|
var ogImages []ogImage
|
|
meta.Each(func(i int, tag *goquery.Selection) {
|
|
for _, ogTag := range ogTags {
|
|
attr, exist := tag.Attr(ogTag.attribute)
|
|
value, vexist := tag.Attr(ogTag.value)
|
|
if exist && attr == ogTag.name && vexist {
|
|
ogImage := ogImage{
|
|
url: value,
|
|
tpe: ogTag.tpe,
|
|
score: 0,
|
|
}
|
|
|
|
ogImages = append(ogImages, ogImage)
|
|
}
|
|
}
|
|
})
|
|
if len(ogImages) == 0 {
|
|
return ""
|
|
}
|
|
if len(ogImages) == 1 {
|
|
topImage = ogImages[0].url
|
|
goto IMAGE_FINALIZE
|
|
}
|
|
for _, ogImage := range ogImages {
|
|
if largebig.MatchString(ogImage.url) {
|
|
ogImage.score++
|
|
}
|
|
if ogImage.tpe == "twitter" {
|
|
ogImage.score++
|
|
}
|
|
}
|
|
topImage = findBestImageFromScore(ogImages).url
|
|
IMAGE_FINALIZE:
|
|
if !strings.HasPrefix(topImage, "http") {
|
|
topImage = "http://" + topImage
|
|
}
|
|
|
|
return topImage
|
|
}
|
|
|
|
// assume that len(ogImages)>=2
|
|
func findBestImageFromScore(ogImages []ogImage) ogImage {
|
|
max := 0
|
|
bestOGImage := ogImages[0]
|
|
for _, ogImage := range ogImages[1:] {
|
|
score := ogImage.score
|
|
if score > max {
|
|
max = score
|
|
bestOGImage = ogImage
|
|
}
|
|
}
|
|
|
|
return bestOGImage
|
|
}
|