mattermost-community-enterp.../enterprise-impl/metrics/metrics.go
Claude ec1f89217a Merge: Complete Mattermost Server with Community Enterprise
Full Mattermost server source with integrated Community Enterprise features.
Includes vendor directory for offline/air-gapped builds.

Structure:
- enterprise-impl/: Enterprise feature implementations
- enterprise-community/: Init files that register implementations
- enterprise/: Bridge imports (community_imports.go)
- vendor/: All dependencies for offline builds

Build (online):
  go build ./cmd/mattermost

Build (offline/air-gapped):
  go build -mod=vendor ./cmd/mattermost

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 23:59:07 +09:00

904 lines
33 KiB
Go

// Copyright (c) 2024 Mattermost Community Enterprise
// Open source implementation of Mattermost Enterprise Metrics using Prometheus
package metrics
import (
"database/sql"
"net/http"
"sync"
"github.com/mattermost/mattermost/server/public/shared/mlog"
"github.com/mattermost/mattermost/server/v8/einterfaces"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const (
MetricsNamespace = "mattermost"
MetricsSubsystem = "server"
)
type MetricsImpl struct {
registry *prometheus.Registry
logger mlog.LoggerIFace
// DB collectors tracking
dbCollectors map[string]prometheus.Collector
dbCollectorMutex sync.Mutex
// Counters
postCreate prometheus.Counter
webhookPost prometheus.Counter
postSentEmail prometheus.Counter
postSentPush prometheus.Counter
postBroadcast prometheus.Counter
postFileAttachment prometheus.Counter
httpRequest prometheus.Counter
httpError prometheus.Counter
clusterRequest prometheus.Counter
clusterRequestTime prometheus.Histogram
clusterEventCounter *prometheus.CounterVec
login prometheus.Counter
loginFail prometheus.Counter
etagHit *prometheus.CounterVec
etagMiss *prometheus.CounterVec
memCacheHit *prometheus.CounterVec
memCacheMiss *prometheus.CounterVec
memCacheInvalidation *prometheus.CounterVec
sessionCacheHit prometheus.Counter
sessionCacheMiss prometheus.Counter
sessionCacheInvalidation prometheus.Counter
websocketEvent *prometheus.CounterVec
websocketBroadcast *prometheus.CounterVec
websocketBroadcastBuffer *prometheus.GaugeVec
websocketBroadcastUsers *prometheus.GaugeVec
websocketReconnect *prometheus.CounterVec
httpWebsockets *prometheus.GaugeVec
postsSearch prometheus.Counter
postsSearchTime prometheus.Histogram
filesSearch prometheus.Counter
filesSearchTime prometheus.Histogram
storeMethodTime *prometheus.HistogramVec
apiEndpointTime *prometheus.HistogramVec
redisEndpointTime *prometheus.HistogramVec
postIndex prometheus.Counter
fileIndex prometheus.Counter
userIndex prometheus.Counter
channelIndex prometheus.Counter
pluginHookTime *prometheus.HistogramVec
pluginMultiHookIterTime *prometheus.HistogramVec
pluginMultiHookTime prometheus.Histogram
pluginAPITime *prometheus.HistogramVec
enabledUsers prometheus.Gauge
remoteClusterMsgSent *prometheus.CounterVec
remoteClusterMsgReceived *prometheus.CounterVec
remoteClusterMsgErrors *prometheus.CounterVec
remoteClusterPingTime *prometheus.HistogramVec
remoteClusterClockSkew *prometheus.GaugeVec
remoteClusterConnState *prometheus.CounterVec
sharedChannelsSync *prometheus.CounterVec
sharedChannelsTaskQueueTime prometheus.Histogram
sharedChannelsQueueSize prometheus.Gauge
sharedChannelsSyncCollectionTime *prometheus.HistogramVec
sharedChannelsSyncSendTime *prometheus.HistogramVec
sharedChannelsSyncCollectionStep *prometheus.HistogramVec
sharedChannelsSyncSendStep *prometheus.HistogramVec
jobActive *prometheus.GaugeVec
replicaLagAbsolute *prometheus.GaugeVec
replicaLagTime *prometheus.GaugeVec
notificationCounter *prometheus.CounterVec
notificationAck *prometheus.CounterVec
notificationSuccess *prometheus.CounterVec
notificationError *prometheus.CounterVec
notificationNotSent *prometheus.CounterVec
notificationUnsupported *prometheus.CounterVec
// Client metrics
clientTimeToFirstByte *prometheus.HistogramVec
clientTimeToLastByte *prometheus.HistogramVec
clientTimeToDomInteractive *prometheus.HistogramVec
clientSplashScreenEnd *prometheus.HistogramVec
clientFirstContentfulPaint *prometheus.HistogramVec
clientLargestContentfulPaint *prometheus.HistogramVec
clientInteractionToNextPaint *prometheus.HistogramVec
clientCumulativeLayoutShift *prometheus.HistogramVec
clientLongTasks *prometheus.CounterVec
clientPageLoadDuration *prometheus.HistogramVec
clientChannelSwitchDuration *prometheus.HistogramVec
clientTeamSwitchDuration *prometheus.HistogramVec
clientRHSLoadDuration *prometheus.HistogramVec
globalThreadsLoadDuration *prometheus.HistogramVec
// Mobile client metrics
mobileClientLoadDuration *prometheus.HistogramVec
mobileClientChannelSwitchDuration *prometheus.HistogramVec
mobileClientTeamSwitchDuration *prometheus.HistogramVec
mobileClientNetworkMetrics *prometheus.HistogramVec
mobileClientSessionMetadata *prometheus.GaugeVec
// Desktop metrics
desktopCpuUsage *prometheus.GaugeVec
desktopMemoryUsage *prometheus.GaugeVec
// Access control metrics
accessControlSearchQuery prometheus.Histogram
accessControlExpressionCompile prometheus.Histogram
accessControlEvaluate prometheus.Histogram
accessControlCacheInvalidation prometheus.Counter
}
func NewMetricsInterface(logger mlog.LoggerIFace) einterfaces.MetricsInterface {
m := &MetricsImpl{
registry: prometheus.NewRegistry(),
logger: logger,
dbCollectors: make(map[string]prometheus.Collector),
}
m.initMetrics()
return m
}
func (m *MetricsImpl) initMetrics() {
// Post metrics
m.postCreate = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_total",
Help: "Total number of posts created",
})
m.webhookPost = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "webhook_post_total",
Help: "Total number of webhook posts",
})
m.postSentEmail = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_sent_email_total",
Help: "Total number of posts sent via email",
})
m.postSentPush = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_sent_push_total",
Help: "Total number of posts sent via push notification",
})
m.postBroadcast = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_broadcast_total",
Help: "Total number of posts broadcast",
})
m.postFileAttachment = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_file_attachment_total",
Help: "Total number of file attachments",
})
// HTTP metrics
m.httpRequest = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "http_request_total",
Help: "Total number of HTTP requests",
})
m.httpError = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "http_error_total",
Help: "Total number of HTTP errors",
})
// Cluster metrics
m.clusterRequest = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cluster_request_total",
Help: "Total number of cluster requests",
})
m.clusterRequestTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cluster_request_duration_seconds",
Help: "Cluster request duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.clusterEventCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cluster_event_total",
Help: "Total number of cluster events by type",
}, []string{"type"})
// Login metrics
m.login = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "login_total",
Help: "Total number of successful logins",
})
m.loginFail = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "login_fail_total",
Help: "Total number of failed logins",
})
// Cache metrics
m.etagHit = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "etag_hit_total",
Help: "Total number of ETag hits",
}, []string{"route"})
m.etagMiss = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "etag_miss_total",
Help: "Total number of ETag misses",
}, []string{"route"})
m.memCacheHit = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cache_hit_total",
Help: "Total number of cache hits",
}, []string{"name"})
m.memCacheMiss = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cache_miss_total",
Help: "Total number of cache misses",
}, []string{"name"})
m.memCacheInvalidation = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "cache_invalidation_total",
Help: "Total number of cache invalidations",
}, []string{"name"})
m.sessionCacheHit = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "session_cache_hit_total",
Help: "Total number of session cache hits",
})
m.sessionCacheMiss = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "session_cache_miss_total",
Help: "Total number of session cache misses",
})
m.sessionCacheInvalidation = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "session_cache_invalidation_total",
Help: "Total number of session cache invalidations",
})
// WebSocket metrics
m.websocketEvent = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "websocket_event_total",
Help: "Total number of websocket events",
}, []string{"type"})
m.websocketBroadcast = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "websocket_broadcast_total",
Help: "Total number of websocket broadcasts",
}, []string{"type"})
m.websocketBroadcastBuffer = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "websocket_broadcast_buffer_size",
Help: "Current websocket broadcast buffer size",
}, []string{"hub"})
m.websocketBroadcastUsers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "websocket_broadcast_users_registered",
Help: "Number of users registered for websocket broadcasts",
}, []string{"hub"})
m.websocketReconnect = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "websocket_reconnect_total",
Help: "Total number of websocket reconnects",
}, []string{"type", "error_code"})
m.httpWebsockets = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "http_websockets_total",
Help: "Total number of active HTTP websocket connections",
}, []string{"origin_client"})
// Search metrics
m.postsSearch = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "posts_search_total",
Help: "Total number of post searches",
})
m.postsSearchTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "posts_search_duration_seconds",
Help: "Post search duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.filesSearch = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "files_search_total",
Help: "Total number of file searches",
})
m.filesSearchTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "files_search_duration_seconds",
Help: "File search duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.storeMethodTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "store_method_duration_seconds",
Help: "Store method duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"method", "success"})
m.apiEndpointTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "api_endpoint_duration_seconds",
Help: "API endpoint duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"endpoint", "method", "status_code", "origin_client", "page_load_context"})
m.redisEndpointTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "redis_endpoint_duration_seconds",
Help: "Redis endpoint duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"cache_name", "operation"})
// Index metrics
m.postIndex = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "post_index_total",
Help: "Total number of posts indexed",
})
m.fileIndex = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "file_index_total",
Help: "Total number of files indexed",
})
m.userIndex = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "user_index_total",
Help: "Total number of users indexed",
})
m.channelIndex = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "channel_index_total",
Help: "Total number of channels indexed",
})
// Plugin metrics
m.pluginHookTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "plugin_hook_duration_seconds",
Help: "Plugin hook duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"plugin_id", "hook_name", "success"})
m.pluginMultiHookIterTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "plugin_multi_hook_iteration_duration_seconds",
Help: "Plugin multi-hook iteration duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"plugin_id"})
m.pluginMultiHookTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "plugin_multi_hook_duration_seconds",
Help: "Plugin multi-hook duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.pluginAPITime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "plugin_api_duration_seconds",
Help: "Plugin API duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"plugin_id", "api_name", "success"})
// Enabled users
m.enabledUsers = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "enabled_users",
Help: "Number of enabled users",
})
// Remote cluster metrics
m.remoteClusterMsgSent = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_msg_sent_total",
Help: "Total messages sent to remote cluster",
}, []string{"remote_id"})
m.remoteClusterMsgReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_msg_received_total",
Help: "Total messages received from remote cluster",
}, []string{"remote_id"})
m.remoteClusterMsgErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_msg_errors_total",
Help: "Total remote cluster message errors",
}, []string{"remote_id", "timeout"})
m.remoteClusterPingTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_ping_duration_seconds",
Help: "Remote cluster ping duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"remote_id"})
m.remoteClusterClockSkew = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_clock_skew_seconds",
Help: "Remote cluster clock skew in seconds",
}, []string{"remote_id"})
m.remoteClusterConnState = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "remote_cluster_conn_state_change_total",
Help: "Total remote cluster connection state changes",
}, []string{"remote_id", "online"})
// Shared channels metrics
m.sharedChannelsSync = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_sync_total",
Help: "Total shared channel syncs",
}, []string{"remote_id"})
m.sharedChannelsTaskQueueTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_task_queue_duration_seconds",
Help: "Shared channels task queue duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.sharedChannelsQueueSize = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_queue_size",
Help: "Shared channels queue size",
})
m.sharedChannelsSyncCollectionTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_sync_collection_duration_seconds",
Help: "Shared channels sync collection duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"remote_id"})
m.sharedChannelsSyncSendTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_sync_send_duration_seconds",
Help: "Shared channels sync send duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"remote_id"})
m.sharedChannelsSyncCollectionStep = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_sync_collection_step_duration_seconds",
Help: "Shared channels sync collection step duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"remote_id", "step"})
m.sharedChannelsSyncSendStep = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "shared_channels_sync_send_step_duration_seconds",
Help: "Shared channels sync send step duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"remote_id", "step"})
// Job metrics
m.jobActive = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "job_active",
Help: "Number of active jobs by type",
}, []string{"type"})
// Replica lag metrics
m.replicaLagAbsolute = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "replica_lag_absolute",
Help: "Replica lag absolute value",
}, []string{"node"})
m.replicaLagTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "replica_lag_time_seconds",
Help: "Replica lag time in seconds",
}, []string{"node"})
// Notification metrics
m.notificationCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_total",
Help: "Total notifications",
}, []string{"type", "platform"})
m.notificationAck = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_ack_total",
Help: "Total notification acknowledgements",
}, []string{"type", "platform"})
m.notificationSuccess = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_success_total",
Help: "Total successful notifications",
}, []string{"type", "platform"})
m.notificationError = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_error_total",
Help: "Total notification errors",
}, []string{"type", "reason", "platform"})
m.notificationNotSent = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_not_sent_total",
Help: "Total notifications not sent",
}, []string{"type", "reason", "platform"})
m.notificationUnsupported = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "notification_unsupported_total",
Help: "Total unsupported notifications",
}, []string{"type", "reason", "platform"})
// Client metrics
m.clientTimeToFirstByte = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_time_to_first_byte_seconds",
Help: "Client time to first byte in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientTimeToLastByte = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_time_to_last_byte_seconds",
Help: "Client time to last byte in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientTimeToDomInteractive = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_time_to_dom_interactive_seconds",
Help: "Client time to DOM interactive in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientSplashScreenEnd = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_splash_screen_end_seconds",
Help: "Client splash screen end in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "page_type", "user_id"})
m.clientFirstContentfulPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_first_contentful_paint_seconds",
Help: "Client first contentful paint in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientLargestContentfulPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_largest_contentful_paint_seconds",
Help: "Client largest contentful paint in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "region", "user_id"})
m.clientInteractionToNextPaint = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_interaction_to_next_paint_seconds",
Help: "Client interaction to next paint in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "interaction", "user_id"})
m.clientCumulativeLayoutShift = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_cumulative_layout_shift",
Help: "Client cumulative layout shift",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientLongTasks = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_long_tasks_total",
Help: "Total client long tasks",
}, []string{"platform", "agent", "user_id"})
m.clientPageLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_page_load_duration_seconds",
Help: "Client page load duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.clientChannelSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_channel_switch_duration_seconds",
Help: "Client channel switch duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "fresh", "user_id"})
m.clientTeamSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_team_switch_duration_seconds",
Help: "Client team switch duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "fresh", "user_id"})
m.clientRHSLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "client_rhs_load_duration_seconds",
Help: "Client RHS load duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
m.globalThreadsLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "global_threads_load_duration_seconds",
Help: "Global threads load duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "user_id"})
// Mobile client metrics
m.mobileClientLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "mobile_client_load_duration_seconds",
Help: "Mobile client load duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform"})
m.mobileClientChannelSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "mobile_client_channel_switch_duration_seconds",
Help: "Mobile client channel switch duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform"})
m.mobileClientTeamSwitchDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "mobile_client_team_switch_duration_seconds",
Help: "Mobile client team switch duration in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"platform"})
m.mobileClientNetworkMetrics = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "mobile_client_network_metrics",
Help: "Mobile client network metrics",
Buckets: prometheus.DefBuckets,
}, []string{"platform", "agent", "group", "metric_type"})
m.mobileClientSessionMetadata = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "mobile_client_session_metadata",
Help: "Mobile client session metadata",
}, []string{"version", "platform", "notification_disabled"})
// Desktop metrics
m.desktopCpuUsage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "desktop_cpu_usage",
Help: "Desktop CPU usage",
}, []string{"platform", "version", "process"})
m.desktopMemoryUsage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "desktop_memory_usage",
Help: "Desktop memory usage",
}, []string{"platform", "version", "process"})
// Access control metrics
m.accessControlSearchQuery = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "access_control_search_query_duration_seconds",
Help: "Access control search query duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.accessControlExpressionCompile = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "access_control_expression_compile_duration_seconds",
Help: "Access control expression compile duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.accessControlEvaluate = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "access_control_evaluate_duration_seconds",
Help: "Access control evaluate duration in seconds",
Buckets: prometheus.DefBuckets,
})
m.accessControlCacheInvalidation = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystem,
Name: "access_control_cache_invalidation_total",
Help: "Total access control cache invalidations",
})
}
// Register registers all metrics with Prometheus
func (m *MetricsImpl) Register() {
// Register default Go collectors
m.registry.MustRegister(collectors.NewGoCollector())
m.registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
// Register all our metrics
m.registry.MustRegister(
m.postCreate, m.webhookPost, m.postSentEmail, m.postSentPush, m.postBroadcast, m.postFileAttachment,
m.httpRequest, m.httpError,
m.clusterRequest, m.clusterRequestTime, m.clusterEventCounter,
m.login, m.loginFail,
m.etagHit, m.etagMiss,
m.memCacheHit, m.memCacheMiss, m.memCacheInvalidation,
m.sessionCacheHit, m.sessionCacheMiss, m.sessionCacheInvalidation,
m.websocketEvent, m.websocketBroadcast, m.websocketBroadcastBuffer, m.websocketBroadcastUsers, m.websocketReconnect, m.httpWebsockets,
m.postsSearch, m.postsSearchTime, m.filesSearch, m.filesSearchTime,
m.storeMethodTime, m.apiEndpointTime, m.redisEndpointTime,
m.postIndex, m.fileIndex, m.userIndex, m.channelIndex,
m.pluginHookTime, m.pluginMultiHookIterTime, m.pluginMultiHookTime, m.pluginAPITime,
m.enabledUsers,
m.remoteClusterMsgSent, m.remoteClusterMsgReceived, m.remoteClusterMsgErrors, m.remoteClusterPingTime, m.remoteClusterClockSkew, m.remoteClusterConnState,
m.sharedChannelsSync, m.sharedChannelsTaskQueueTime, m.sharedChannelsQueueSize, m.sharedChannelsSyncCollectionTime, m.sharedChannelsSyncSendTime, m.sharedChannelsSyncCollectionStep, m.sharedChannelsSyncSendStep,
m.jobActive,
m.replicaLagAbsolute, m.replicaLagTime,
m.notificationCounter, m.notificationAck, m.notificationSuccess, m.notificationError, m.notificationNotSent, m.notificationUnsupported,
m.clientTimeToFirstByte, m.clientTimeToLastByte, m.clientTimeToDomInteractive, m.clientSplashScreenEnd, m.clientFirstContentfulPaint, m.clientLargestContentfulPaint, m.clientInteractionToNextPaint, m.clientCumulativeLayoutShift, m.clientLongTasks, m.clientPageLoadDuration, m.clientChannelSwitchDuration, m.clientTeamSwitchDuration, m.clientRHSLoadDuration, m.globalThreadsLoadDuration,
m.mobileClientLoadDuration, m.mobileClientChannelSwitchDuration, m.mobileClientTeamSwitchDuration, m.mobileClientNetworkMetrics, m.mobileClientSessionMetadata,
m.desktopCpuUsage, m.desktopMemoryUsage,
m.accessControlSearchQuery, m.accessControlExpressionCompile, m.accessControlEvaluate, m.accessControlCacheInvalidation,
)
m.logger.Info("Metrics registered successfully")
}
// Handler returns the HTTP handler for metrics
func (m *MetricsImpl) Handler() http.Handler {
return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})
}
func (m *MetricsImpl) RegisterDBCollector(db *sql.DB, name string) {
m.dbCollectorMutex.Lock()
defer m.dbCollectorMutex.Unlock()
collector := collectors.NewDBStatsCollector(db, name)
m.dbCollectors[name] = collector
m.registry.MustRegister(collector)
}
func (m *MetricsImpl) UnregisterDBCollector(db *sql.DB, name string) {
m.dbCollectorMutex.Lock()
defer m.dbCollectorMutex.Unlock()
if collector, ok := m.dbCollectors[name]; ok {
m.registry.Unregister(collector)
delete(m.dbCollectors, name)
}
}