Full Mattermost server source with integrated Community Enterprise features. Includes vendor directory for offline/air-gapped builds. Structure: - enterprise-impl/: Enterprise feature implementations - enterprise-community/: Init files that register implementations - enterprise/: Bridge imports (community_imports.go) - vendor/: All dependencies for offline builds Build (online): go build ./cmd/mattermost Build (offline/air-gapped): go build -mod=vendor ./cmd/mattermost 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
904 lines
33 KiB
Go
904 lines
33 KiB
Go
// Copyright (c) 2024 Mattermost Community Enterprise
|
|
// Open source implementation of Mattermost Enterprise Metrics using Prometheus
|
|
|
|
package metrics
|
|
|
|
import (
|
|
"database/sql"
|
|
"net/http"
|
|
"sync"
|
|
|
|
"github.com/mattermost/mattermost/server/public/shared/mlog"
|
|
"github.com/mattermost/mattermost/server/v8/einterfaces"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
const (
|
|
MetricsNamespace = "mattermost"
|
|
MetricsSubsystem = "server"
|
|
)
|
|
|
|
type MetricsImpl struct {
|
|
registry *prometheus.Registry
|
|
logger mlog.LoggerIFace
|
|
|
|
// DB collectors tracking
|
|
dbCollectors map[string]prometheus.Collector
|
|
dbCollectorMutex sync.Mutex
|
|
|
|
// Counters
|
|
postCreate prometheus.Counter
|
|
webhookPost prometheus.Counter
|
|
postSentEmail prometheus.Counter
|
|
postSentPush prometheus.Counter
|
|
postBroadcast prometheus.Counter
|
|
postFileAttachment prometheus.Counter
|
|
|
|
httpRequest prometheus.Counter
|
|
httpError prometheus.Counter
|
|
|
|
clusterRequest prometheus.Counter
|
|
clusterRequestTime prometheus.Histogram
|
|
clusterEventCounter *prometheus.CounterVec
|
|
|
|
login prometheus.Counter
|
|
loginFail prometheus.Counter
|
|
|
|
etagHit *prometheus.CounterVec
|
|
etagMiss *prometheus.CounterVec
|
|
|
|
memCacheHit *prometheus.CounterVec
|
|
memCacheMiss *prometheus.CounterVec
|
|
memCacheInvalidation *prometheus.CounterVec
|
|
|
|
sessionCacheHit prometheus.Counter
|
|
sessionCacheMiss prometheus.Counter
|
|
sessionCacheInvalidation prometheus.Counter
|
|
|
|
websocketEvent *prometheus.CounterVec
|
|
websocketBroadcast *prometheus.CounterVec
|
|
websocketBroadcastBuffer *prometheus.GaugeVec
|
|
websocketBroadcastUsers *prometheus.GaugeVec
|
|
websocketReconnect *prometheus.CounterVec
|
|
httpWebsockets *prometheus.GaugeVec
|
|
|
|
postsSearch prometheus.Counter
|
|
postsSearchTime prometheus.Histogram
|
|
filesSearch prometheus.Counter
|
|
filesSearchTime prometheus.Histogram
|
|
storeMethodTime *prometheus.HistogramVec
|
|
apiEndpointTime *prometheus.HistogramVec
|
|
redisEndpointTime *prometheus.HistogramVec
|
|
|
|
postIndex prometheus.Counter
|
|
fileIndex prometheus.Counter
|
|
userIndex prometheus.Counter
|
|
channelIndex prometheus.Counter
|
|
|
|
pluginHookTime *prometheus.HistogramVec
|
|
pluginMultiHookIterTime *prometheus.HistogramVec
|
|
pluginMultiHookTime prometheus.Histogram
|
|
pluginAPITime *prometheus.HistogramVec
|
|
|
|
enabledUsers prometheus.Gauge
|
|
|
|
remoteClusterMsgSent *prometheus.CounterVec
|
|
remoteClusterMsgReceived *prometheus.CounterVec
|
|
remoteClusterMsgErrors *prometheus.CounterVec
|
|
remoteClusterPingTime *prometheus.HistogramVec
|
|
remoteClusterClockSkew *prometheus.GaugeVec
|
|
remoteClusterConnState *prometheus.CounterVec
|
|
|
|
sharedChannelsSync *prometheus.CounterVec
|
|
sharedChannelsTaskQueueTime prometheus.Histogram
|
|
sharedChannelsQueueSize prometheus.Gauge
|
|
sharedChannelsSyncCollectionTime *prometheus.HistogramVec
|
|
sharedChannelsSyncSendTime *prometheus.HistogramVec
|
|
sharedChannelsSyncCollectionStep *prometheus.HistogramVec
|
|
sharedChannelsSyncSendStep *prometheus.HistogramVec
|
|
|
|
jobActive *prometheus.GaugeVec
|
|
|
|
replicaLagAbsolute *prometheus.GaugeVec
|
|
replicaLagTime *prometheus.GaugeVec
|
|
|
|
notificationCounter *prometheus.CounterVec
|
|
notificationAck *prometheus.CounterVec
|
|
notificationSuccess *prometheus.CounterVec
|
|
notificationError *prometheus.CounterVec
|
|
notificationNotSent *prometheus.CounterVec
|
|
notificationUnsupported *prometheus.CounterVec
|
|
|
|
// Client metrics
|
|
clientTimeToFirstByte *prometheus.HistogramVec
|
|
clientTimeToLastByte *prometheus.HistogramVec
|
|
clientTimeToDomInteractive *prometheus.HistogramVec
|
|
clientSplashScreenEnd *prometheus.HistogramVec
|
|
clientFirstContentfulPaint *prometheus.HistogramVec
|
|
clientLargestContentfulPaint *prometheus.HistogramVec
|
|
clientInteractionToNextPaint *prometheus.HistogramVec
|
|
clientCumulativeLayoutShift *prometheus.HistogramVec
|
|
clientLongTasks *prometheus.CounterVec
|
|
clientPageLoadDuration *prometheus.HistogramVec
|
|
clientChannelSwitchDuration *prometheus.HistogramVec
|
|
clientTeamSwitchDuration *prometheus.HistogramVec
|
|
clientRHSLoadDuration *prometheus.HistogramVec
|
|
globalThreadsLoadDuration *prometheus.HistogramVec
|
|
|
|
// Mobile client metrics
|
|
mobileClientLoadDuration *prometheus.HistogramVec
|
|
mobileClientChannelSwitchDuration *prometheus.HistogramVec
|
|
mobileClientTeamSwitchDuration *prometheus.HistogramVec
|
|
mobileClientNetworkMetrics *prometheus.HistogramVec
|
|
mobileClientSessionMetadata *prometheus.GaugeVec
|
|
|
|
// Desktop metrics
|
|
desktopCpuUsage *prometheus.GaugeVec
|
|
desktopMemoryUsage *prometheus.GaugeVec
|
|
|
|
// Access control metrics
|
|
accessControlSearchQuery prometheus.Histogram
|
|
accessControlExpressionCompile prometheus.Histogram
|
|
accessControlEvaluate prometheus.Histogram
|
|
accessControlCacheInvalidation prometheus.Counter
|
|
}
|
|
|
|
func NewMetricsInterface(logger mlog.LoggerIFace) einterfaces.MetricsInterface {
|
|
m := &MetricsImpl{
|
|
registry: prometheus.NewRegistry(),
|
|
logger: logger,
|
|
dbCollectors: make(map[string]prometheus.Collector),
|
|
}
|
|
|
|
m.initMetrics()
|
|
return m
|
|
}
|
|
|
|
func (m *MetricsImpl) initMetrics() {
|
|
// Post metrics
|
|
m.postCreate = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_total",
|
|
Help: "Total number of posts created",
|
|
})
|
|
|
|
m.webhookPost = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "webhook_post_total",
|
|
Help: "Total number of webhook posts",
|
|
})
|
|
|
|
m.postSentEmail = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_sent_email_total",
|
|
Help: "Total number of posts sent via email",
|
|
})
|
|
|
|
m.postSentPush = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_sent_push_total",
|
|
Help: "Total number of posts sent via push notification",
|
|
})
|
|
|
|
m.postBroadcast = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_broadcast_total",
|
|
Help: "Total number of posts broadcast",
|
|
})
|
|
|
|
m.postFileAttachment = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_file_attachment_total",
|
|
Help: "Total number of file attachments",
|
|
})
|
|
|
|
// HTTP metrics
|
|
m.httpRequest = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "http_request_total",
|
|
Help: "Total number of HTTP requests",
|
|
})
|
|
|
|
m.httpError = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "http_error_total",
|
|
Help: "Total number of HTTP errors",
|
|
})
|
|
|
|
// Cluster metrics
|
|
m.clusterRequest = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cluster_request_total",
|
|
Help: "Total number of cluster requests",
|
|
})
|
|
|
|
m.clusterRequestTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cluster_request_duration_seconds",
|
|
Help: "Cluster request duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.clusterEventCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cluster_event_total",
|
|
Help: "Total number of cluster events by type",
|
|
}, []string{"type"})
|
|
|
|
// Login metrics
|
|
m.login = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "login_total",
|
|
Help: "Total number of successful logins",
|
|
})
|
|
|
|
m.loginFail = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "login_fail_total",
|
|
Help: "Total number of failed logins",
|
|
})
|
|
|
|
// Cache metrics
|
|
m.etagHit = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "etag_hit_total",
|
|
Help: "Total number of ETag hits",
|
|
}, []string{"route"})
|
|
|
|
m.etagMiss = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "etag_miss_total",
|
|
Help: "Total number of ETag misses",
|
|
}, []string{"route"})
|
|
|
|
m.memCacheHit = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cache_hit_total",
|
|
Help: "Total number of cache hits",
|
|
}, []string{"name"})
|
|
|
|
m.memCacheMiss = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cache_miss_total",
|
|
Help: "Total number of cache misses",
|
|
}, []string{"name"})
|
|
|
|
m.memCacheInvalidation = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "cache_invalidation_total",
|
|
Help: "Total number of cache invalidations",
|
|
}, []string{"name"})
|
|
|
|
m.sessionCacheHit = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "session_cache_hit_total",
|
|
Help: "Total number of session cache hits",
|
|
})
|
|
|
|
m.sessionCacheMiss = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "session_cache_miss_total",
|
|
Help: "Total number of session cache misses",
|
|
})
|
|
|
|
m.sessionCacheInvalidation = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "session_cache_invalidation_total",
|
|
Help: "Total number of session cache invalidations",
|
|
})
|
|
|
|
// WebSocket metrics
|
|
m.websocketEvent = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "websocket_event_total",
|
|
Help: "Total number of websocket events",
|
|
}, []string{"type"})
|
|
|
|
m.websocketBroadcast = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "websocket_broadcast_total",
|
|
Help: "Total number of websocket broadcasts",
|
|
}, []string{"type"})
|
|
|
|
m.websocketBroadcastBuffer = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "websocket_broadcast_buffer_size",
|
|
Help: "Current websocket broadcast buffer size",
|
|
}, []string{"hub"})
|
|
|
|
m.websocketBroadcastUsers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "websocket_broadcast_users_registered",
|
|
Help: "Number of users registered for websocket broadcasts",
|
|
}, []string{"hub"})
|
|
|
|
m.websocketReconnect = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "websocket_reconnect_total",
|
|
Help: "Total number of websocket reconnects",
|
|
}, []string{"type", "error_code"})
|
|
|
|
m.httpWebsockets = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "http_websockets_total",
|
|
Help: "Total number of active HTTP websocket connections",
|
|
}, []string{"origin_client"})
|
|
|
|
// Search metrics
|
|
m.postsSearch = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "posts_search_total",
|
|
Help: "Total number of post searches",
|
|
})
|
|
|
|
m.postsSearchTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "posts_search_duration_seconds",
|
|
Help: "Post search duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.filesSearch = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "files_search_total",
|
|
Help: "Total number of file searches",
|
|
})
|
|
|
|
m.filesSearchTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "files_search_duration_seconds",
|
|
Help: "File search duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.storeMethodTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "store_method_duration_seconds",
|
|
Help: "Store method duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"method", "success"})
|
|
|
|
m.apiEndpointTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "api_endpoint_duration_seconds",
|
|
Help: "API endpoint duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"endpoint", "method", "status_code", "origin_client", "page_load_context"})
|
|
|
|
m.redisEndpointTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "redis_endpoint_duration_seconds",
|
|
Help: "Redis endpoint duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"cache_name", "operation"})
|
|
|
|
// Index metrics
|
|
m.postIndex = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "post_index_total",
|
|
Help: "Total number of posts indexed",
|
|
})
|
|
|
|
m.fileIndex = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "file_index_total",
|
|
Help: "Total number of files indexed",
|
|
})
|
|
|
|
m.userIndex = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "user_index_total",
|
|
Help: "Total number of users indexed",
|
|
})
|
|
|
|
m.channelIndex = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "channel_index_total",
|
|
Help: "Total number of channels indexed",
|
|
})
|
|
|
|
// Plugin metrics
|
|
m.pluginHookTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "plugin_hook_duration_seconds",
|
|
Help: "Plugin hook duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"plugin_id", "hook_name", "success"})
|
|
|
|
m.pluginMultiHookIterTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "plugin_multi_hook_iteration_duration_seconds",
|
|
Help: "Plugin multi-hook iteration duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"plugin_id"})
|
|
|
|
m.pluginMultiHookTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "plugin_multi_hook_duration_seconds",
|
|
Help: "Plugin multi-hook duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.pluginAPITime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "plugin_api_duration_seconds",
|
|
Help: "Plugin API duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"plugin_id", "api_name", "success"})
|
|
|
|
// Enabled users
|
|
m.enabledUsers = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "enabled_users",
|
|
Help: "Number of enabled users",
|
|
})
|
|
|
|
// Remote cluster metrics
|
|
m.remoteClusterMsgSent = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_msg_sent_total",
|
|
Help: "Total messages sent to remote cluster",
|
|
}, []string{"remote_id"})
|
|
|
|
m.remoteClusterMsgReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_msg_received_total",
|
|
Help: "Total messages received from remote cluster",
|
|
}, []string{"remote_id"})
|
|
|
|
m.remoteClusterMsgErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_msg_errors_total",
|
|
Help: "Total remote cluster message errors",
|
|
}, []string{"remote_id", "timeout"})
|
|
|
|
m.remoteClusterPingTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_ping_duration_seconds",
|
|
Help: "Remote cluster ping duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"remote_id"})
|
|
|
|
m.remoteClusterClockSkew = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_clock_skew_seconds",
|
|
Help: "Remote cluster clock skew in seconds",
|
|
}, []string{"remote_id"})
|
|
|
|
m.remoteClusterConnState = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "remote_cluster_conn_state_change_total",
|
|
Help: "Total remote cluster connection state changes",
|
|
}, []string{"remote_id", "online"})
|
|
|
|
// Shared channels metrics
|
|
m.sharedChannelsSync = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_sync_total",
|
|
Help: "Total shared channel syncs",
|
|
}, []string{"remote_id"})
|
|
|
|
m.sharedChannelsTaskQueueTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_task_queue_duration_seconds",
|
|
Help: "Shared channels task queue duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.sharedChannelsQueueSize = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_queue_size",
|
|
Help: "Shared channels queue size",
|
|
})
|
|
|
|
m.sharedChannelsSyncCollectionTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_sync_collection_duration_seconds",
|
|
Help: "Shared channels sync collection duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"remote_id"})
|
|
|
|
m.sharedChannelsSyncSendTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_sync_send_duration_seconds",
|
|
Help: "Shared channels sync send duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"remote_id"})
|
|
|
|
m.sharedChannelsSyncCollectionStep = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_sync_collection_step_duration_seconds",
|
|
Help: "Shared channels sync collection step duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"remote_id", "step"})
|
|
|
|
m.sharedChannelsSyncSendStep = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "shared_channels_sync_send_step_duration_seconds",
|
|
Help: "Shared channels sync send step duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"remote_id", "step"})
|
|
|
|
// Job metrics
|
|
m.jobActive = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "job_active",
|
|
Help: "Number of active jobs by type",
|
|
}, []string{"type"})
|
|
|
|
// Replica lag metrics
|
|
m.replicaLagAbsolute = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "replica_lag_absolute",
|
|
Help: "Replica lag absolute value",
|
|
}, []string{"node"})
|
|
|
|
m.replicaLagTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "replica_lag_time_seconds",
|
|
Help: "Replica lag time in seconds",
|
|
}, []string{"node"})
|
|
|
|
// Notification metrics
|
|
m.notificationCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_total",
|
|
Help: "Total notifications",
|
|
}, []string{"type", "platform"})
|
|
|
|
m.notificationAck = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_ack_total",
|
|
Help: "Total notification acknowledgements",
|
|
}, []string{"type", "platform"})
|
|
|
|
m.notificationSuccess = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_success_total",
|
|
Help: "Total successful notifications",
|
|
}, []string{"type", "platform"})
|
|
|
|
m.notificationError = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_error_total",
|
|
Help: "Total notification errors",
|
|
}, []string{"type", "reason", "platform"})
|
|
|
|
m.notificationNotSent = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_not_sent_total",
|
|
Help: "Total notifications not sent",
|
|
}, []string{"type", "reason", "platform"})
|
|
|
|
m.notificationUnsupported = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "notification_unsupported_total",
|
|
Help: "Total unsupported notifications",
|
|
}, []string{"type", "reason", "platform"})
|
|
|
|
// Client metrics
|
|
m.clientTimeToFirstByte = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_time_to_first_byte_seconds",
|
|
Help: "Client time to first byte in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientTimeToLastByte = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_time_to_last_byte_seconds",
|
|
Help: "Client time to last byte in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientTimeToDomInteractive = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_time_to_dom_interactive_seconds",
|
|
Help: "Client time to DOM interactive in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientSplashScreenEnd = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_splash_screen_end_seconds",
|
|
Help: "Client splash screen end in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "page_type", "user_id"})
|
|
|
|
m.clientFirstContentfulPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_first_contentful_paint_seconds",
|
|
Help: "Client first contentful paint in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientLargestContentfulPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_largest_contentful_paint_seconds",
|
|
Help: "Client largest contentful paint in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "region", "user_id"})
|
|
|
|
m.clientInteractionToNextPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_interaction_to_next_paint_seconds",
|
|
Help: "Client interaction to next paint in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "interaction", "user_id"})
|
|
|
|
m.clientCumulativeLayoutShift = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_cumulative_layout_shift",
|
|
Help: "Client cumulative layout shift",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientLongTasks = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_long_tasks_total",
|
|
Help: "Total client long tasks",
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientPageLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_page_load_duration_seconds",
|
|
Help: "Client page load duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.clientChannelSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_channel_switch_duration_seconds",
|
|
Help: "Client channel switch duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "fresh", "user_id"})
|
|
|
|
m.clientTeamSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_team_switch_duration_seconds",
|
|
Help: "Client team switch duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "fresh", "user_id"})
|
|
|
|
m.clientRHSLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "client_rhs_load_duration_seconds",
|
|
Help: "Client RHS load duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
m.globalThreadsLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "global_threads_load_duration_seconds",
|
|
Help: "Global threads load duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "user_id"})
|
|
|
|
// Mobile client metrics
|
|
m.mobileClientLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "mobile_client_load_duration_seconds",
|
|
Help: "Mobile client load duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform"})
|
|
|
|
m.mobileClientChannelSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "mobile_client_channel_switch_duration_seconds",
|
|
Help: "Mobile client channel switch duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform"})
|
|
|
|
m.mobileClientTeamSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "mobile_client_team_switch_duration_seconds",
|
|
Help: "Mobile client team switch duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform"})
|
|
|
|
m.mobileClientNetworkMetrics = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "mobile_client_network_metrics",
|
|
Help: "Mobile client network metrics",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"platform", "agent", "group", "metric_type"})
|
|
|
|
m.mobileClientSessionMetadata = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "mobile_client_session_metadata",
|
|
Help: "Mobile client session metadata",
|
|
}, []string{"version", "platform", "notification_disabled"})
|
|
|
|
// Desktop metrics
|
|
m.desktopCpuUsage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "desktop_cpu_usage",
|
|
Help: "Desktop CPU usage",
|
|
}, []string{"platform", "version", "process"})
|
|
|
|
m.desktopMemoryUsage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "desktop_memory_usage",
|
|
Help: "Desktop memory usage",
|
|
}, []string{"platform", "version", "process"})
|
|
|
|
// Access control metrics
|
|
m.accessControlSearchQuery = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "access_control_search_query_duration_seconds",
|
|
Help: "Access control search query duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.accessControlExpressionCompile = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "access_control_expression_compile_duration_seconds",
|
|
Help: "Access control expression compile duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.accessControlEvaluate = prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "access_control_evaluate_duration_seconds",
|
|
Help: "Access control evaluate duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
m.accessControlCacheInvalidation = prometheus.NewCounter(prometheus.CounterOpts{
|
|
Namespace: MetricsNamespace,
|
|
Subsystem: MetricsSubsystem,
|
|
Name: "access_control_cache_invalidation_total",
|
|
Help: "Total access control cache invalidations",
|
|
})
|
|
}
|
|
|
|
// Register registers all metrics with Prometheus
|
|
func (m *MetricsImpl) Register() {
|
|
// Register default Go collectors
|
|
m.registry.MustRegister(collectors.NewGoCollector())
|
|
m.registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
|
|
|
|
// Register all our metrics
|
|
m.registry.MustRegister(
|
|
m.postCreate, m.webhookPost, m.postSentEmail, m.postSentPush, m.postBroadcast, m.postFileAttachment,
|
|
m.httpRequest, m.httpError,
|
|
m.clusterRequest, m.clusterRequestTime, m.clusterEventCounter,
|
|
m.login, m.loginFail,
|
|
m.etagHit, m.etagMiss,
|
|
m.memCacheHit, m.memCacheMiss, m.memCacheInvalidation,
|
|
m.sessionCacheHit, m.sessionCacheMiss, m.sessionCacheInvalidation,
|
|
m.websocketEvent, m.websocketBroadcast, m.websocketBroadcastBuffer, m.websocketBroadcastUsers, m.websocketReconnect, m.httpWebsockets,
|
|
m.postsSearch, m.postsSearchTime, m.filesSearch, m.filesSearchTime,
|
|
m.storeMethodTime, m.apiEndpointTime, m.redisEndpointTime,
|
|
m.postIndex, m.fileIndex, m.userIndex, m.channelIndex,
|
|
m.pluginHookTime, m.pluginMultiHookIterTime, m.pluginMultiHookTime, m.pluginAPITime,
|
|
m.enabledUsers,
|
|
m.remoteClusterMsgSent, m.remoteClusterMsgReceived, m.remoteClusterMsgErrors, m.remoteClusterPingTime, m.remoteClusterClockSkew, m.remoteClusterConnState,
|
|
m.sharedChannelsSync, m.sharedChannelsTaskQueueTime, m.sharedChannelsQueueSize, m.sharedChannelsSyncCollectionTime, m.sharedChannelsSyncSendTime, m.sharedChannelsSyncCollectionStep, m.sharedChannelsSyncSendStep,
|
|
m.jobActive,
|
|
m.replicaLagAbsolute, m.replicaLagTime,
|
|
m.notificationCounter, m.notificationAck, m.notificationSuccess, m.notificationError, m.notificationNotSent, m.notificationUnsupported,
|
|
m.clientTimeToFirstByte, m.clientTimeToLastByte, m.clientTimeToDomInteractive, m.clientSplashScreenEnd, m.clientFirstContentfulPaint, m.clientLargestContentfulPaint, m.clientInteractionToNextPaint, m.clientCumulativeLayoutShift, m.clientLongTasks, m.clientPageLoadDuration, m.clientChannelSwitchDuration, m.clientTeamSwitchDuration, m.clientRHSLoadDuration, m.globalThreadsLoadDuration,
|
|
m.mobileClientLoadDuration, m.mobileClientChannelSwitchDuration, m.mobileClientTeamSwitchDuration, m.mobileClientNetworkMetrics, m.mobileClientSessionMetadata,
|
|
m.desktopCpuUsage, m.desktopMemoryUsage,
|
|
m.accessControlSearchQuery, m.accessControlExpressionCompile, m.accessControlEvaluate, m.accessControlCacheInvalidation,
|
|
)
|
|
|
|
m.logger.Info("Metrics registered successfully")
|
|
}
|
|
|
|
// Handler returns the HTTP handler for metrics
|
|
func (m *MetricsImpl) Handler() http.Handler {
|
|
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
|
|
}
|
|
|
|
func (m *MetricsImpl) RegisterDBCollector(db *sql.DB, name string) {
|
|
m.dbCollectorMutex.Lock()
|
|
defer m.dbCollectorMutex.Unlock()
|
|
|
|
collector := collectors.NewDBStatsCollector(db, name)
|
|
m.dbCollectors[name] = collector
|
|
m.registry.MustRegister(collector)
|
|
}
|
|
|
|
func (m *MetricsImpl) UnregisterDBCollector(db *sql.DB, name string) {
|
|
m.dbCollectorMutex.Lock()
|
|
defer m.dbCollectorMutex.Unlock()
|
|
|
|
if collector, ok := m.dbCollectors[name]; ok {
|
|
m.registry.Unregister(collector)
|
|
delete(m.dbCollectors, name)
|
|
}
|
|
}
|