Skip to content

Commit

Permalink
Metrics: Wireup total requests api metrics
Browse files Browse the repository at this point in the history
The following wires up the outbound total requests API metrics. It uses
the juju/http request recorder.

We have to be extra careful about what we want to use as labels, we
don't want to log the path or the complete URL for a couple of reasons:

 1. Security around what can be in a URL; auth tokens et al
 2. We don't want to balloon the cardinality of the database.

With this in mind, we want to just request the host names for now, which
keeps the cardinality low.
  • Loading branch information
SimonRichardson committed Jun 18, 2021
1 parent 5735549 commit 4bb5edc
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 37 deletions.
1 change: 1 addition & 0 deletions apiserver/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func (a *admin) login(ctx context.Context, req params.LoginRequest, loginVersion
a.root,
httpRequestRecorderWrapper{
collector: a.srv.metricsCollector,
modelUUID: a.root.model.UUID(),
},
)
if err != nil {
Expand Down
30 changes: 13 additions & 17 deletions apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -462,21 +463,6 @@ func (srv *Server) getAgentToken() error {
return nil
}

// loggoWrapper is an io.Writer() that forwards the messages to a loggo.Logger.
// Unfortunately http takes a concrete stdlib log.Logger struct, and not an
// interface, so we can't just proxy all of the log levels without inspecting
// the string content. For now, we just want to get the messages into the log
// file.
type loggoWrapper struct {
logger loggo.Logger
level loggo.Level
}

func (w *loggoWrapper) Write(content []byte) (int, error) {
w.logger.Logf(w.level, "%s", string(content))
return len(content), nil
}

// logsinkMetricsCollectorWrapper defines a wrapper for exposing the essentials
// for the logsink api handler to interact with the metrics collector.
type logsinkMetricsCollectorWrapper struct {
Expand Down Expand Up @@ -507,16 +493,26 @@ func (w logsinkMetricsCollectorWrapper) LogReadCount(modelUUID, state string) pr
// essentials for the http request recorder.
type httpRequestRecorderWrapper struct {
collector *Collector
modelUUID string
}

// Record an outgoing request which produced an http.Response.
func (w httpRequestRecorderWrapper) Record(method string, url *url.URL, res *http.Response, rtt time.Duration) {

// Note: Do not log url.Path as REST queries _can_ include the name of the
// entities (charms, architectures, etc).
w.collector.TotalRequests.WithLabelValues(w.modelUUID, url.Host, strconv.FormatInt(int64(res.StatusCode), 10)).Inc()
if res.StatusCode >= 400 {
w.collector.TotalRequestErrors.WithLabelValues(w.modelUUID, url.Host).Inc()
}
w.collector.TotalRequestsDuration.WithLabelValues(w.modelUUID, url.Host).Observe(rtt.Seconds())
}

// Record an outgoing request which returned back an error.
func (w httpRequestRecorderWrapper) RecordError(method string, url *url.URL, err error) {

// Note: Do not log url.Path as REST queries _can_ include the name of the
// entities (charms, architectures, etc).
w.collector.TotalRequests.WithLabelValues(w.modelUUID, url.Host, "unknown").Inc()
w.collector.TotalRequestErrors.WithLabelValues(w.modelUUID, url.Host).Inc()
}

// loop is the main loop for the server.
Expand Down
88 changes: 75 additions & 13 deletions apiserver/apiservermetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,25 @@ const (
apiserverSubsystemNamespace = "apiserver"
)

// MetricLabelEndpoint defines a constant for the APIConnections abd
// PingFailureCount Labels
const MetricLabelEndpoint = "endpoint"
const (
// MetricLabelEndpoint defines a constant for the APIConnections and
// PingFailureCount Labels
MetricLabelEndpoint = "endpoint"

// MetricLabelModelUUID defines a constant for the PingFailureCount and
// LogWriteCount Labels
// Note: prometheus doesn't allow hyphens only underscores
MetricLabelModelUUID = "model_uuid"

// MetricLabelModelUUID defines a constant for the PingFailureCount and
// LogWriteCount Labels
// Note: prometheus doesn't allow hyphens only underscores
const MetricLabelModelUUID = "model_uuid"
// MetricLabelState defines a constant for the state Label
MetricLabelState = "state"

// MetricLabelState defines a constant for the LogWriteCount Label
const MetricLabelState = "state"
// MetricLabelHost defines a host constant for the Requests Label
MetricLabelHost = "host"

// MetricLabelStatus defines a status constant for the Requests Label
MetricLabelStatus = "status"
)

// MetricAPIConnectionsLabelNames defines a series of labels for the
// APIConnections metric.
Expand All @@ -46,16 +54,38 @@ var MetricLogLabelNames = []string{
MetricLabelState,
}

// MetricTotalRequestsWithStatusLabelNames defines a series of labels for the
// TotalRequests metric.
var MetricTotalRequestsWithStatusLabelNames = []string{
MetricLabelModelUUID,
MetricLabelHost,
MetricLabelStatus,
}

// MetricTotalRequestsLabelNames defines a series of labels for the
// TotalRequests metric.
var MetricTotalRequestsLabelNames = []string{
MetricLabelModelUUID,
MetricLabelHost,
}

// Collector is a prometheus.Collector that collects metrics based
// on apiserver status.
type Collector struct {
TotalConnections prometheus.Counter
TotalConnections prometheus.Counter

LoginAttempts prometheus.Gauge
APIConnections *prometheus.GaugeVec
APIRequestDuration *prometheus.SummaryVec
PingFailureCount *prometheus.CounterVec
LogWriteCount *prometheus.CounterVec
LogReadCount *prometheus.CounterVec

PingFailureCount *prometheus.CounterVec

LogWriteCount *prometheus.CounterVec
LogReadCount *prometheus.CounterVec

TotalRequests *prometheus.CounterVec
TotalRequestErrors *prometheus.CounterVec
TotalRequestsDuration *prometheus.SummaryVec
}

// NewMetricsCollector returns a new Collector.
Expand Down Expand Up @@ -91,12 +121,14 @@ func NewMetricsCollector() *Collector {
0.99: 0.001,
},
}, metricobserver.MetricLabelNames),

PingFailureCount: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: apiserverMetricsNamespace,
Subsystem: apiserverSubsystemNamespace,
Name: "ping_failure_count",
Help: "Current number of ping failures",
}, MetricPingFailureLabelNames),

LogWriteCount: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: apiserverMetricsNamespace,
Subsystem: apiserverSubsystemNamespace,
Expand All @@ -109,6 +141,30 @@ func NewMetricsCollector() *Collector {
Name: "log_read_count",
Help: "Current number of log reads",
}, MetricLogLabelNames),

TotalRequests: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: apiserverMetricsNamespace,
Subsystem: apiserverSubsystemNamespace,
Name: "outbound_requests_total",
Help: "Total number of http requests to outbound APIs",
}, MetricTotalRequestsWithStatusLabelNames),
TotalRequestErrors: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: apiserverMetricsNamespace,
Subsystem: apiserverSubsystemNamespace,
Name: "outbound_request_errors_total",
Help: "Total number of http request errors to outbound APIs",
}, MetricTotalRequestsLabelNames),
TotalRequestsDuration: prometheus.NewSummaryVec(prometheus.SummaryOpts{
Namespace: apiserverMetricsNamespace,
Subsystem: apiserverSubsystemNamespace,
Name: "outbound_request_duration_seconds",
Help: "Latency of outbound API requests in seconds.",
Objectives: map[float64]float64{
0.5: 0.05,
0.9: 0.01,
0.99: 0.001,
},
}, MetricTotalRequestsLabelNames),
}
}

Expand All @@ -121,6 +177,9 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
c.PingFailureCount.Describe(ch)
c.LogWriteCount.Describe(ch)
c.LogReadCount.Describe(ch)
c.TotalRequests.Describe(ch)
c.TotalRequestErrors.Describe(ch)
c.TotalRequestsDuration.Describe(ch)
}

// Collect is part of the prometheus.Collector interface.
Expand All @@ -132,4 +191,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) {
c.PingFailureCount.Collect(ch)
c.LogWriteCount.Collect(ch)
c.LogReadCount.Collect(ch)
c.TotalRequests.Collect(ch)
c.TotalRequestErrors.Collect(ch)
c.TotalRequestsDuration.Collect(ch)
}
16 changes: 15 additions & 1 deletion apiserver/apiservermetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,18 @@ func (s *apiservermetricsSuite) TestDescribe(c *gc.C) {
for desc := range ch {
descs = append(descs, desc)
}
c.Assert(descs, gc.HasLen, 7)
c.Assert(descs, gc.HasLen, 10)
c.Assert(descs[0].String(), gc.Matches, `.*fqName: "juju_apiserver_connections_total".*`)
c.Assert(descs[1].String(), gc.Matches, `.*fqName: "juju_apiserver_connections".*`)
c.Assert(descs[2].String(), gc.Matches, `.*fqName: "juju_apiserver_active_login_attempts".*`)
c.Assert(descs[3].String(), gc.Matches, `.*fqName: "juju_apiserver_request_duration_seconds".*`)
c.Assert(descs[4].String(), gc.Matches, `.*fqName: "juju_apiserver_ping_failure_count".*`)
c.Assert(descs[5].String(), gc.Matches, `.*fqName: "juju_apiserver_log_write_count".*`)
c.Assert(descs[6].String(), gc.Matches, `.*fqName: "juju_apiserver_log_read_count".*`)

c.Assert(descs[7].String(), gc.Matches, `.*fqName: "juju_apiserver_outbound_requests_total".*`)
c.Assert(descs[8].String(), gc.Matches, `.*fqName: "juju_apiserver_outbound_request_errors_total".*`)
c.Assert(descs[9].String(), gc.Matches, `.*fqName: "juju_apiserver_outbound_request_duration_seconds".*`)
}

func (s *apiservermetricsSuite) TestCollect(c *gc.C) {
Expand Down Expand Up @@ -83,6 +87,16 @@ func (s *apiservermetricsSuite) TestLabelNames(c *gc.C) {
labels: apiserver.MetricLogLabelNames,
checker: jc.IsTrue,
},
{
name: "total requests with status label names",
labels: apiserver.MetricTotalRequestsWithStatusLabelNames,
checker: jc.IsTrue,
},
{
name: "total requests label names",
labels: apiserver.MetricTotalRequestsLabelNames,
checker: jc.IsTrue,
},
{
name: "invalid names",
labels: []string{"model-uuid"},
Expand Down
8 changes: 2 additions & 6 deletions charmhub/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,15 @@ type loggingRequestRecorder struct {

// Record an outgoing request which produced an http.Response.
func (r loggingRequestRecorder) Record(method string, url *url.URL, res *http.Response, rtt time.Duration) {
status := "unknown"
if res != nil {
status = res.Status
}
if r.logger.IsTraceEnabled() {
r.logger.Tracef("request (method: %q, url: %q, status: %q, duration: %s)", method, url, status, rtt)
r.logger.Tracef("request (method: %q, host: %q, path: %q, status: %q, duration: %s)", method, url.Host, url.Path, res.Status, rtt)
}
}

// Record an outgoing request which returned back an error.
func (r loggingRequestRecorder) RecordError(method string, url *url.URL, err error) {
if r.logger.IsTraceEnabled() {
r.logger.Tracef("request error (method: %q, url: %q, err: %s)", method, url, err)
r.logger.Tracef("request error (method: %q, host: %q, path: %q, err: %s)", method, url.Host, url.Path, err)
}
}

Expand Down

0 comments on commit 4bb5edc

Please sign in to comment.