main commit
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-10-16 16:30:25 +09:00
parent 91c7e04474
commit 537e7b363f
1146 changed files with 45926 additions and 77196 deletions

View File

@@ -5,10 +5,9 @@ from . import (
process_collector, registry,
)
from .exposition import (
CONTENT_TYPE_LATEST, CONTENT_TYPE_PLAIN_0_0_4, CONTENT_TYPE_PLAIN_1_0_0,
delete_from_gateway, generate_latest, instance_ip_grouping_key,
make_asgi_app, make_wsgi_app, MetricsHandler, push_to_gateway,
pushadd_to_gateway, start_http_server, start_wsgi_server,
CONTENT_TYPE_LATEST, delete_from_gateway, generate_latest,
instance_ip_grouping_key, make_asgi_app, make_wsgi_app, MetricsHandler,
push_to_gateway, pushadd_to_gateway, start_http_server, start_wsgi_server,
write_to_textfile,
)
from .gc_collector import GC_COLLECTOR, GCCollector
@@ -34,8 +33,6 @@ __all__ = (
'enable_created_metrics',
'disable_created_metrics',
'CONTENT_TYPE_LATEST',
'CONTENT_TYPE_PLAIN_0_0_4',
'CONTENT_TYPE_PLAIN_1_0_0',
'generate_latest',
'MetricsHandler',
'make_wsgi_app',

View File

@@ -11,7 +11,7 @@ def make_asgi_app(registry: CollectorRegistry = REGISTRY, disable_compression: b
async def prometheus_app(scope, receive, send):
assert scope.get("type") == "http"
# Prepare parameters
params = parse_qs(scope.get('query_string', b'').decode("utf8"))
params = parse_qs(scope.get('query_string', b''))
accept_header = ",".join([
value.decode("utf8") for (name, value) in scope.get('headers')
if name.decode("utf8").lower() == 'accept'

View File

@@ -5,10 +5,9 @@ from .metrics_core import (
SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
)
from .registry import CollectorRegistry, REGISTRY
from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
from .samples import Exemplar, Sample, Timestamp
__all__ = (
'BucketSpan',
'CollectorRegistry',
'Counter',
'CounterMetricFamily',
@@ -22,7 +21,6 @@ __all__ = (
'Info',
'InfoMetricFamily',
'Metric',
'NativeHistogram',
'REGISTRY',
'Sample',
'StateSetMetricFamily',

View File

@@ -1,6 +1,5 @@
import base64
from contextlib import closing
from functools import partial
import gzip
from http.server import BaseHTTPRequestHandler
import os
@@ -20,12 +19,10 @@ from wsgiref.simple_server import make_server, WSGIRequestHandler, WSGIServer
from .openmetrics import exposition as openmetrics
from .registry import CollectorRegistry, REGISTRY
from .utils import floatToGoString, parse_version
from .utils import floatToGoString
__all__ = (
'CONTENT_TYPE_LATEST',
'CONTENT_TYPE_PLAIN_0_0_4',
'CONTENT_TYPE_PLAIN_1_0_0',
'delete_from_gateway',
'generate_latest',
'instance_ip_grouping_key',
@@ -39,13 +36,8 @@ __all__ = (
'write_to_textfile',
)
CONTENT_TYPE_PLAIN_0_0_4 = 'text/plain; version=0.0.4; charset=utf-8'
"""Content type of the compatibility format"""
CONTENT_TYPE_PLAIN_1_0_0 = 'text/plain; version=1.0.0; charset=utf-8'
"""Content type of the latest format"""
CONTENT_TYPE_LATEST = CONTENT_TYPE_PLAIN_1_0_0
CONTENT_TYPE_LATEST = 'text/plain; version=0.0.4; charset=utf-8'
"""Content type of the latest text format"""
class _PrometheusRedirectHandler(HTTPRedirectHandler):
@@ -126,24 +118,12 @@ def make_wsgi_app(registry: CollectorRegistry = REGISTRY, disable_compression: b
accept_header = environ.get('HTTP_ACCEPT')
accept_encoding_header = environ.get('HTTP_ACCEPT_ENCODING')
params = parse_qs(environ.get('QUERY_STRING', ''))
method = environ['REQUEST_METHOD']
if method == 'OPTIONS':
status = '200 OK'
headers = [('Allow', 'OPTIONS,GET')]
output = b''
elif method != 'GET':
status = '405 Method Not Allowed'
headers = [('Allow', 'OPTIONS,GET')]
output = '# HTTP {}: {}; use OPTIONS or GET\n'.format(status, method).encode()
elif environ['PATH_INFO'] == '/favicon.ico':
if environ['PATH_INFO'] == '/favicon.ico':
# Serve empty response for browsers
status = '200 OK'
headers = []
headers = [('', '')]
output = b''
else:
# Note: For backwards compatibility, the URI path for GET is not
# constrained to the documented /metrics, but any path is allowed.
# Bake output
status, headers, output = _bake_output(registry, accept_header, accept_encoding_header, params, disable_compression)
# Return output
@@ -174,63 +154,12 @@ def _get_best_family(address, port):
# binding an ipv6 address is requested.
# This function is based on what upstream python did for http.server
# in https://github.com/python/cpython/pull/11767
infos = socket.getaddrinfo(address, port, type=socket.SOCK_STREAM, flags=socket.AI_PASSIVE)
infos = socket.getaddrinfo(address, port)
family, _, _, _, sockaddr = next(iter(infos))
return family, sockaddr[0]
def _get_ssl_ctx(
certfile: str,
keyfile: str,
protocol: int,
cafile: Optional[str] = None,
capath: Optional[str] = None,
client_auth_required: bool = False,
) -> ssl.SSLContext:
"""Load context supports SSL."""
ssl_cxt = ssl.SSLContext(protocol=protocol)
if cafile is not None or capath is not None:
try:
ssl_cxt.load_verify_locations(cafile, capath)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load CA certificate chain from file "
f"{cafile!r} or directory {capath!r}: {msg}")
else:
try:
ssl_cxt.load_default_certs(purpose=ssl.Purpose.CLIENT_AUTH)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load default CA certificate chain: {msg}")
if client_auth_required:
ssl_cxt.verify_mode = ssl.CERT_REQUIRED
try:
ssl_cxt.load_cert_chain(certfile=certfile, keyfile=keyfile)
except IOError as exc:
exc_type = type(exc)
msg = str(exc)
raise exc_type(f"Cannot load server certificate file {certfile!r} or "
f"its private key file {keyfile!r}: {msg}")
return ssl_cxt
def start_wsgi_server(
port: int,
addr: str = '0.0.0.0',
registry: CollectorRegistry = REGISTRY,
certfile: Optional[str] = None,
keyfile: Optional[str] = None,
client_cafile: Optional[str] = None,
client_capath: Optional[str] = None,
protocol: int = ssl.PROTOCOL_TLS_SERVER,
client_auth_required: bool = False,
) -> Tuple[WSGIServer, threading.Thread]:
def start_wsgi_server(port: int, addr: str = '0.0.0.0', registry: CollectorRegistry = REGISTRY) -> None:
"""Starts a WSGI server for prometheus metrics as a daemon thread."""
class TmpServer(ThreadingWSGIServer):
@@ -239,51 +168,30 @@ def start_wsgi_server(
TmpServer.address_family, addr = _get_best_family(addr, port)
app = make_wsgi_app(registry)
httpd = make_server(addr, port, app, TmpServer, handler_class=_SilentHandler)
if certfile and keyfile:
context = _get_ssl_ctx(certfile, keyfile, protocol, client_cafile, client_capath, client_auth_required)
httpd.socket = context.wrap_socket(httpd.socket, server_side=True)
t = threading.Thread(target=httpd.serve_forever)
t.daemon = True
t.start()
return httpd, t
start_http_server = start_wsgi_server
def generate_latest(registry: CollectorRegistry = REGISTRY, escaping: str = openmetrics.UNDERSCORES) -> bytes:
"""
Generates the exposition format using the basic Prometheus text format.
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
"""Returns the metrics from the registry in latest text format as a string."""
Params:
registry: CollectorRegistry to export data from.
escaping: Escaping scheme used for metric and label names.
Returns: UTF-8 encoded string containing the metrics in text format.
"""
def sample_line(samples):
if samples.labels:
labelstr = '{0}'.format(','.join(
# Label values always support UTF-8
def sample_line(line):
if line.labels:
labelstr = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
openmetrics.escape_label_name(k, escaping), openmetrics._escape(v, openmetrics.ALLOWUTF8, False))
for k, v in sorted(samples.labels.items())]))
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(line.labels.items())]))
else:
labelstr = ''
timestamp = ''
if samples.timestamp is not None:
if line.timestamp is not None:
# Convert to milliseconds.
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
if escaping != openmetrics.ALLOWUTF8 or openmetrics._is_valid_legacy_metric_name(samples.name):
if labelstr:
labelstr = '{{{0}}}'.format(labelstr)
return f'{openmetrics.escape_metric_name(samples.name, escaping)}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
maybe_comma = ''
if labelstr:
maybe_comma = ','
return f'{{{openmetrics.escape_metric_name(samples.name, escaping)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
timestamp = f' {int(float(line.timestamp) * 1000):d}'
return f'{line.name}{labelstr} {floatToGoString(line.value)}{timestamp}\n'
output = []
for metric in registry.collect():
@@ -306,8 +214,8 @@ def generate_latest(registry: CollectorRegistry = REGISTRY, escaping: str = open
mtype = 'untyped'
output.append('# HELP {} {}\n'.format(
openmetrics.escape_metric_name(mname, escaping), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(mname, escaping)} {mtype}\n')
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {mname} {mtype}\n')
om_samples: Dict[str, List[str]] = {}
for s in metric.samples:
@@ -323,79 +231,20 @@ def generate_latest(registry: CollectorRegistry = REGISTRY, escaping: str = open
raise
for suffix, lines in sorted(om_samples.items()):
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix, escaping),
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix, escaping)} gauge\n')
output.append('# HELP {}{} {}\n'.format(metric.name, suffix,
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {metric.name}{suffix} gauge\n')
output.extend(lines)
return ''.join(output).encode('utf-8')
def choose_encoder(accept_header: str) -> Tuple[Callable[[CollectorRegistry], bytes], str]:
# Python client library accepts a narrower range of content-types than
# Prometheus does.
accept_header = accept_header or ''
escaping = openmetrics.UNDERSCORES
for accepted in accept_header.split(','):
if accepted.split(';')[0].strip() == 'application/openmetrics-text':
toks = accepted.split(';')
version = _get_version(toks)
escaping = _get_escaping(toks)
# Only return an escaping header if we have a good version and
# mimetype.
if not version:
return (partial(openmetrics.generate_latest, escaping=openmetrics.UNDERSCORES, version="1.0.0"), openmetrics.CONTENT_TYPE_LATEST)
if version and parse_version(version) >= (1, 0, 0):
return (partial(openmetrics.generate_latest, escaping=escaping, version=version),
f'application/openmetrics-text; version={version}; charset=utf-8; escaping=' + str(escaping))
elif accepted.split(';')[0].strip() == 'text/plain':
toks = accepted.split(';')
version = _get_version(toks)
escaping = _get_escaping(toks)
# Only return an escaping header if we have a good version and
# mimetype.
if version and parse_version(version) >= (1, 0, 0):
return (partial(generate_latest, escaping=escaping),
CONTENT_TYPE_LATEST + '; escaping=' + str(escaping))
return generate_latest, CONTENT_TYPE_PLAIN_0_0_4
def _get_version(accept_header: List[str]) -> str:
"""Return the version tag from the Accept header.
If no version is specified, returns empty string."""
for tok in accept_header:
if '=' not in tok:
continue
key, value = tok.strip().split('=', 1)
if key == 'version':
return value
return ""
def _get_escaping(accept_header: List[str]) -> str:
"""Return the escaping scheme from the Accept header.
If no escaping scheme is specified or the scheme is not one of the allowed
strings, defaults to UNDERSCORES."""
for tok in accept_header:
if '=' not in tok:
continue
key, value = tok.strip().split('=', 1)
if key != 'escaping':
continue
if value == openmetrics.ALLOWUTF8:
return openmetrics.ALLOWUTF8
elif value == openmetrics.UNDERSCORES:
return openmetrics.UNDERSCORES
elif value == openmetrics.DOTS:
return openmetrics.DOTS
elif value == openmetrics.VALUES:
return openmetrics.VALUES
else:
return openmetrics.UNDERSCORES
return openmetrics.UNDERSCORES
return (openmetrics.generate_latest,
openmetrics.CONTENT_TYPE_LATEST)
return generate_latest, CONTENT_TYPE_LATEST
def gzip_accepted(accept_encoding_header: str) -> bool:
@@ -444,34 +293,20 @@ class MetricsHandler(BaseHTTPRequestHandler):
return MyMetricsHandler
def write_to_textfile(path: str, registry: CollectorRegistry, escaping: str = openmetrics.ALLOWUTF8, tmpdir: Optional[str] = None) -> None:
def write_to_textfile(path: str, registry: CollectorRegistry) -> None:
"""Write metrics to the given path.
This is intended for use with the Node exporter textfile collector.
The path must end in .prom for the textfile collector to process it.
The path must end in .prom for the textfile collector to process it."""
tmppath = f'{path}.{os.getpid()}.{threading.current_thread().ident}'
with open(tmppath, 'wb') as f:
f.write(generate_latest(registry))
An optional tmpdir parameter can be set to determine where the
metrics will be temporarily written to. If not set, it will be in
the same directory as the .prom file. If provided, the path MUST be
on the same filesystem."""
if tmpdir is not None:
filename = os.path.basename(path)
tmppath = f'{os.path.join(tmpdir, filename)}.{os.getpid()}.{threading.current_thread().ident}'
# rename(2) is atomic but fails on Windows if the destination file exists
if os.name == 'nt':
os.replace(tmppath, path)
else:
tmppath = f'{path}.{os.getpid()}.{threading.current_thread().ident}'
try:
with open(tmppath, 'wb') as f:
f.write(generate_latest(registry, escaping))
# rename(2) is atomic but fails on Windows if the destination file exists
if os.name == 'nt':
os.replace(tmppath, path)
else:
os.rename(tmppath, path)
except Exception:
if os.path.exists(tmppath):
os.remove(tmppath)
raise
os.rename(tmppath, path)
def _make_handler(
@@ -572,7 +407,7 @@ def tls_auth_handler(
The default protocol (ssl.PROTOCOL_TLS_CLIENT) will also enable
ssl.CERT_REQUIRED and SSLContext.check_hostname by default. This can be
disabled by setting insecure_skip_verify to True.
Both this handler and the TLS feature on pushgateay are experimental."""
context = ssl.SSLContext(protocol=protocol)
if cafile is not None:
@@ -729,7 +564,7 @@ def _use_gateway(
handler(
url=url, method=method, timeout=timeout,
headers=[('Content-Type', CONTENT_TYPE_PLAIN_0_0_4)], data=data,
headers=[('Content-Type', CONTENT_TYPE_LATEST)], data=data,
)()

View File

@@ -6,25 +6,22 @@ from typing import (
Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple,
Type, TypeVar, Union,
)
import warnings
from . import values # retain this import style for testability
from .context_managers import ExceptionCounter, InprogressTracker, Timer
from .metrics_core import Metric
from .metrics_core import (
Metric, METRIC_LABEL_NAME_RE, METRIC_NAME_RE,
RESERVED_METRIC_LABEL_NAME_RE,
)
from .registry import Collector, CollectorRegistry, REGISTRY
from .samples import Exemplar, Sample
from .utils import floatToGoString, INF
from .validation import (
_validate_exemplar, _validate_labelnames, _validate_metric_name,
)
T = TypeVar('T', bound='MetricWrapperBase')
F = TypeVar("F", bound=Callable[..., Any])
def _build_full_name(metric_type, name, namespace, subsystem, unit):
if not name:
raise ValueError('Metric name should not be empty')
full_name = ''
if namespace:
full_name += namespace + '_'
@@ -40,6 +37,31 @@ def _build_full_name(metric_type, name, namespace, subsystem, unit):
return full_name
def _validate_labelname(l):
if not METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)
def _validate_labelnames(cls, labelnames):
labelnames = tuple(labelnames)
for l in labelnames:
_validate_labelname(l)
if l in cls._reserved_labelnames:
raise ValueError('Reserved label metric name: ' + l)
return labelnames
def _validate_exemplar(exemplar):
runes = 0
for k, v in exemplar.items():
_validate_labelname(k)
runes += len(k)
runes += len(v)
if runes > 128:
raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128')
def _get_use_created() -> bool:
return os.environ.get("PROMETHEUS_DISABLE_CREATED_SERIES", 'False').lower() not in ('true', '1', 't')
@@ -88,8 +110,8 @@ class MetricWrapperBase(Collector):
def collect(self) -> Iterable[Metric]:
metric = self._get_metric()
for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
for suffix, labels, value, timestamp, exemplar in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar)
return [metric]
def __str__(self) -> str:
@@ -116,7 +138,8 @@ class MetricWrapperBase(Collector):
self._documentation = documentation
self._unit = unit
_validate_metric_name(self._name)
if not METRIC_NAME_RE.match(self._name):
raise ValueError('Invalid metric name: ' + self._name)
if self._is_parent():
# Prepare the fields needed for child metrics.
@@ -187,11 +210,6 @@ class MetricWrapperBase(Collector):
return self._metrics[labelvalues]
def remove(self, *labelvalues: Any) -> None:
if 'prometheus_multiproc_dir' in os.environ or 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
warnings.warn(
"Removal of labels has not been implemented in multi-process mode yet.",
UserWarning)
if not self._labelnames:
raise ValueError('No label names were set when constructing %s' % self)
@@ -200,15 +218,10 @@ class MetricWrapperBase(Collector):
raise ValueError('Incorrect label count (expected %d, got %s)' % (len(self._labelnames), labelvalues))
labelvalues = tuple(str(l) for l in labelvalues)
with self._lock:
if labelvalues in self._metrics:
del self._metrics[labelvalues]
del self._metrics[labelvalues]
def clear(self) -> None:
"""Remove all labelsets from the metric"""
if 'prometheus_multiproc_dir' in os.environ or 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
warnings.warn(
"Clearing labels has not been implemented in multi-process mode yet",
UserWarning)
with self._lock:
self._metrics = {}
@@ -223,8 +236,8 @@ class MetricWrapperBase(Collector):
metrics = self._metrics.copy()
for labels, metric in metrics.items():
series_labels = list(zip(self._labelnames, labels))
for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)
for suffix, sample_labels, value, timestamp, exemplar in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar)
def _child_samples(self) -> Iterable[Sample]: # pragma: no cover
raise NotImplementedError('_child_samples() must be implemented by %r' % self)
@@ -269,12 +282,6 @@ class Counter(MetricWrapperBase):
# Count only one type of exception
with c.count_exceptions(ValueError):
pass
You can also reset the counter to zero in case your logical "process" restarts
without restarting the actual python process.
c.reset()
"""
_type = 'counter'
@@ -293,11 +300,6 @@ class Counter(MetricWrapperBase):
_validate_exemplar(exemplar)
self._value.set_exemplar(Exemplar(exemplar, amount, time.time()))
def reset(self) -> None:
"""Reset the counter to zero. Use this when a logical process restarts without restarting the actual python process."""
self._value.set(0)
self._created = time.time()
def count_exceptions(self, exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]] = Exception) -> ExceptionCounter:
"""Count exceptions in a block of code or function.
@@ -682,8 +684,6 @@ class Info(MetricWrapperBase):
if self._labelname_set.intersection(val.keys()):
raise ValueError('Overlapping labels for Info metric, metric: {} child: {}'.format(
self._labelnames, val))
if any(i is None for i in val.values()):
raise ValueError('Label value cannot be None')
with self._lock:
self._value = dict(val)

View File

@@ -1,12 +1,15 @@
import re
from typing import Dict, List, Optional, Sequence, Tuple, Union
from .samples import Exemplar, NativeHistogram, Sample, Timestamp
from .validation import _validate_metric_name
from .samples import Exemplar, Sample, Timestamp
METRIC_TYPES = (
'counter', 'gauge', 'summary', 'histogram',
'gaugehistogram', 'unknown', 'info', 'stateset',
)
METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$')
METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$')
class Metric:
@@ -21,7 +24,8 @@ class Metric:
def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
if unit and not name.endswith("_" + unit):
name += "_" + unit
_validate_metric_name(name)
if not METRIC_NAME_RE.match(name):
raise ValueError('Invalid metric name: ' + name)
self.name: str = name
self.documentation: str = documentation
self.unit: str = unit
@@ -32,11 +36,11 @@ class Metric:
self.type: str = typ
self.samples: List[Sample] = []
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None:
"""Add a sample to the metric.
Internal-only, do not use."""
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))
self.samples.append(Sample(name, labels, value, timestamp, exemplar))
def __eq__(self, other: object) -> bool:
return (isinstance(other, Metric)
@@ -112,7 +116,6 @@ class CounterMetricFamily(Metric):
labels: Optional[Sequence[str]] = None,
created: Optional[float] = None,
unit: str = '',
exemplar: Optional[Exemplar] = None,
):
# Glue code for pre-OpenMetrics metrics.
if name.endswith('_total'):
@@ -124,14 +127,13 @@ class CounterMetricFamily(Metric):
labels = []
self._labelnames = tuple(labels)
if value is not None:
self.add_metric([], value, created, exemplar=exemplar)
self.add_metric([], value, created)
def add_metric(self,
labels: Sequence[str],
value: float,
created: Optional[float] = None,
timestamp: Optional[Union[Timestamp, float]] = None,
exemplar: Optional[Exemplar] = None,
) -> None:
"""Add a metric to the metric family.
@@ -140,7 +142,7 @@ class CounterMetricFamily(Metric):
value: The value of the metric
created: Optional unix timestamp the child was created at.
"""
self.samples.append(Sample(self.name + '_total', dict(zip(self._labelnames, labels)), value, timestamp, exemplar))
self.samples.append(Sample(self.name + '_total', dict(zip(self._labelnames, labels)), value, timestamp))
if created is not None:
self.samples.append(Sample(self.name + '_created', dict(zip(self._labelnames, labels)), created, timestamp))
@@ -282,6 +284,7 @@ class HistogramMetricFamily(Metric):
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))
class GaugeHistogramMetricFamily(Metric):
"""A single gauge histogram and its samples.

View File

@@ -93,7 +93,7 @@ class MultiProcessCollector:
buckets = defaultdict(lambda: defaultdict(float))
samples_setdefault = samples.setdefault
for s in metric.samples:
name, labels, value, timestamp, exemplar, native_histogram_value = s
name, labels, value, timestamp, exemplar = s
if metric.type == 'gauge':
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
if metric._multiprocess_mode in ('min', 'livemin'):

View File

@@ -1,287 +1,72 @@
#!/usr/bin/env python
from io import StringIO
from sys import maxunicode
from typing import Callable
from ..utils import floatToGoString, parse_version
from ..validation import (
_is_valid_legacy_labelname, _is_valid_legacy_metric_name,
)
from ..utils import floatToGoString
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
"""Content type of the latest OpenMetrics 1.0 text format"""
CONTENT_TYPE_LATEST_2_0 = 'application/openmetrics-text; version=2.0.0; charset=utf-8'
"""Content type of the OpenMetrics 2.0 text format"""
ESCAPING_HEADER_TAG = 'escaping'
ALLOWUTF8 = 'allow-utf-8'
UNDERSCORES = 'underscores'
DOTS = 'dots'
VALUES = 'values'
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=0.0.1; charset=utf-8'
"""Content type of the latest OpenMetrics text format"""
def _is_valid_exemplar_metric(metric, sample):
if metric.type == 'counter' and sample.name.endswith('_total'):
return True
if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
return True
if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
if metric.type in ('histogram', 'gaugehistogram') and sample.name.endswith('_bucket'):
return True
return False
def _compose_exemplar_string(metric, sample, exemplar):
"""Constructs an exemplar string."""
if not _is_valid_exemplar_metric(metric, sample):
raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter")
labels = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(exemplar.labels.items())]))
if exemplar.timestamp is not None:
exemplarstr = ' # {} {} {}'.format(
labels,
floatToGoString(exemplar.value),
exemplar.timestamp,
)
else:
exemplarstr = ' # {} {}'.format(
labels,
floatToGoString(exemplar.value),
)
return exemplarstr
def generate_latest(registry, escaping=UNDERSCORES, version="1.0.0"):
def generate_latest(registry):
'''Returns the metrics from the registry in latest text format as a string.'''
output = []
for metric in registry.collect():
try:
mname = metric.name
output.append('# HELP {} {}\n'.format(
escape_metric_name(mname, escaping), _escape(metric.documentation, ALLOWUTF8, _is_legacy_labelname_rune)))
output.append(f'# TYPE {escape_metric_name(mname, escaping)} {metric.type}\n')
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')))
output.append(f'# TYPE {mname} {metric.type}\n')
if metric.unit:
output.append(f'# UNIT {escape_metric_name(mname, escaping)} {metric.unit}\n')
output.append(f'# UNIT {mname} {metric.unit}\n')
for s in metric.samples:
if escaping == ALLOWUTF8 and not _is_valid_legacy_metric_name(s.name):
labelstr = escape_metric_name(s.name, escaping)
if s.labels:
labelstr += ','
if s.labels:
labelstr = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(s.labels.items())]))
else:
labelstr = ''
if s.labels:
items = sorted(s.labels.items())
# Label values always support UTF-8
labelstr += ','.join(
['{}="{}"'.format(
escape_label_name(k, escaping), _escape(v, ALLOWUTF8, _is_legacy_labelname_rune))
for k, v in items])
if labelstr:
labelstr = "{" + labelstr + "}"
if s.exemplar:
exemplarstr = _compose_exemplar_string(metric, s, s.exemplar)
if not _is_valid_exemplar_metric(metric, s):
raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter")
labels = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(s.exemplar.labels.items())]))
if s.exemplar.timestamp is not None:
exemplarstr = ' # {} {} {}'.format(
labels,
floatToGoString(s.exemplar.value),
s.exemplar.timestamp,
)
else:
exemplarstr = ' # {} {}'.format(
labels,
floatToGoString(s.exemplar.value),
)
else:
exemplarstr = ''
timestamp = ''
if s.timestamp is not None:
timestamp = f' {s.timestamp}'
# Skip native histogram samples entirely if version < 2.0.0
if s.native_histogram and parse_version(version) < (2, 0, 0):
continue
native_histogram = ''
negative_spans = ''
negative_deltas = ''
positive_spans = ''
positive_deltas = ''
if s.native_histogram:
# Initialize basic nh template
nh_sample_template = '{{count:{},sum:{},schema:{},zero_threshold:{},zero_count:{}'
args = [
s.native_histogram.count_value,
s.native_histogram.sum_value,
s.native_histogram.schema,
s.native_histogram.zero_threshold,
s.native_histogram.zero_count,
]
# If there are neg spans, append them and the neg deltas to the template and args
if s.native_histogram.neg_spans:
negative_spans = ','.join([f'{ns[0]}:{ns[1]}' for ns in s.native_histogram.neg_spans])
negative_deltas = ','.join(str(nd) for nd in s.native_histogram.neg_deltas)
nh_sample_template += ',negative_spans:[{}]'
args.append(negative_spans)
nh_sample_template += ',negative_deltas:[{}]'
args.append(negative_deltas)
# If there are pos spans, append them and the pos spans to the template and args
if s.native_histogram.pos_spans:
positive_spans = ','.join([f'{ps[0]}:{ps[1]}' for ps in s.native_histogram.pos_spans])
positive_deltas = ','.join(f'{pd}' for pd in s.native_histogram.pos_deltas)
nh_sample_template += ',positive_spans:[{}]'
args.append(positive_spans)
nh_sample_template += ',positive_deltas:[{}]'
args.append(positive_deltas)
# Add closing brace
nh_sample_template += '}}'
# Format the template with the args
native_histogram = nh_sample_template.format(*args)
if s.native_histogram.nh_exemplars:
for nh_ex in s.native_histogram.nh_exemplars:
nh_exemplarstr = _compose_exemplar_string(metric, s, nh_ex)
exemplarstr += nh_exemplarstr
value = ''
if s.native_histogram:
value = native_histogram
elif s.value is not None:
value = floatToGoString(s.value)
if (escaping != ALLOWUTF8) or _is_valid_legacy_metric_name(s.name):
output.append('{}{} {}{}{}\n'.format(
_escape(s.name, escaping, _is_legacy_labelname_rune),
labelstr,
value,
timestamp,
exemplarstr
))
else:
output.append('{} {}{}{}\n'.format(
labelstr,
value,
timestamp,
exemplarstr
))
output.append('{}{} {}{}{}\n'.format(
s.name,
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
except Exception as exception:
exception.args = (exception.args or ('',)) + (metric,)
raise
output.append('# EOF\n')
return ''.join(output).encode('utf-8')
def escape_metric_name(s: str, escaping: str = UNDERSCORES) -> str:
"""Escapes the metric name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if len(s) == 0:
return s
if escaping == ALLOWUTF8:
if not _is_valid_legacy_metric_name(s):
return '"{}"'.format(_escape(s, escaping, _is_legacy_metric_rune))
return _escape(s, escaping, _is_legacy_metric_rune)
elif escaping == UNDERSCORES:
if _is_valid_legacy_metric_name(s):
return s
return _escape(s, escaping, _is_legacy_metric_rune)
elif escaping == DOTS:
return _escape(s, escaping, _is_legacy_metric_rune)
elif escaping == VALUES:
if _is_valid_legacy_metric_name(s):
return s
return _escape(s, escaping, _is_legacy_metric_rune)
return s
def escape_label_name(s: str, escaping: str = UNDERSCORES) -> str:
"""Escapes the label name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if len(s) == 0:
return s
if escaping == ALLOWUTF8:
if not _is_valid_legacy_labelname(s):
return '"{}"'.format(_escape(s, escaping, _is_legacy_labelname_rune))
return _escape(s, escaping, _is_legacy_labelname_rune)
elif escaping == UNDERSCORES:
if _is_valid_legacy_labelname(s):
return s
return _escape(s, escaping, _is_legacy_labelname_rune)
elif escaping == DOTS:
return _escape(s, escaping, _is_legacy_labelname_rune)
elif escaping == VALUES:
if _is_valid_legacy_labelname(s):
return s
return _escape(s, escaping, _is_legacy_labelname_rune)
return s
def _escape(s: str, escaping: str, valid_rune_fn: Callable[[str, int], bool]) -> str:
"""Performs backslash escaping on backslash, newline, and double-quote characters.
valid_rune_fn takes the input character and its index in the containing string."""
if escaping == ALLOWUTF8:
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
elif escaping == UNDERSCORES:
escaped = StringIO()
for i, b in enumerate(s):
if valid_rune_fn(b, i):
escaped.write(b)
else:
escaped.write('_')
return escaped.getvalue()
elif escaping == DOTS:
escaped = StringIO()
for i, b in enumerate(s):
if b == '_':
escaped.write('__')
elif b == '.':
escaped.write('_dot_')
elif valid_rune_fn(b, i):
escaped.write(b)
else:
escaped.write('__')
return escaped.getvalue()
elif escaping == VALUES:
escaped = StringIO()
escaped.write("U__")
for i, b in enumerate(s):
if b == '_':
escaped.write("__")
elif valid_rune_fn(b, i):
escaped.write(b)
elif not _is_valid_utf8(b):
escaped.write("_FFFD_")
else:
escaped.write('_')
escaped.write(format(ord(b), 'x'))
escaped.write('_')
return escaped.getvalue()
return s
def _is_legacy_metric_rune(b: str, i: int) -> bool:
return _is_legacy_labelname_rune(b, i) or b == ':'
def _is_legacy_labelname_rune(b: str, i: int) -> bool:
if len(b) != 1:
raise ValueError("Input 'b' must be a single character.")
return (
('a' <= b <= 'z')
or ('A' <= b <= 'Z')
or (b == '_')
or ('0' <= b <= '9' and i > 0)
)
_SURROGATE_MIN = 0xD800
_SURROGATE_MAX = 0xDFFF
def _is_valid_utf8(s: str) -> bool:
if 0 <= ord(s) < _SURROGATE_MIN:
return True
if _SURROGATE_MAX < ord(s) <= maxunicode:
return True
return False

View File

@@ -5,14 +5,9 @@ import io as StringIO
import math
import re
from ..metrics_core import Metric
from ..parser import (
_last_unquoted_char, _next_unquoted_char, _parse_value, _split_quoted,
_unquote_unescape, parse_labels,
)
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
from ..metrics_core import Metric, METRIC_LABEL_NAME_RE
from ..samples import Exemplar, Sample, Timestamp
from ..utils import floatToGoString
from ..validation import _is_valid_legacy_metric_name, _validate_metric_name
def text_string_to_metric_families(text):
@@ -78,6 +73,16 @@ def _unescape_help(text):
return ''.join(result)
def _parse_value(value):
value = ''.join(value)
if value != value.strip() or '_' in value:
raise ValueError(f"Invalid value: {value!r}")
try:
return int(value)
except ValueError:
return float(value)
def _parse_timestamp(timestamp):
timestamp = ''.join(timestamp)
if not timestamp:
@@ -108,31 +113,165 @@ def _is_character_escaped(s, charpos):
return num_bslashes % 2 == 1
def _parse_labels_with_state_machine(text):
# The { has already been parsed.
state = 'startoflabelname'
labelname = []
labelvalue = []
labels = {}
labels_len = 0
for char in text:
if state == 'startoflabelname':
if char == '}':
state = 'endoflabels'
else:
state = 'labelname'
labelname.append(char)
elif state == 'labelname':
if char == '=':
state = 'labelvaluequote'
else:
labelname.append(char)
elif state == 'labelvaluequote':
if char == '"':
state = 'labelvalue'
else:
raise ValueError("Invalid line: " + text)
elif state == 'labelvalue':
if char == '\\':
state = 'labelvalueslash'
elif char == '"':
ln = ''.join(labelname)
if not METRIC_LABEL_NAME_RE.match(ln):
raise ValueError("Invalid line, bad label name: " + text)
if ln in labels:
raise ValueError("Invalid line, duplicate label name: " + text)
labels[ln] = ''.join(labelvalue)
labelname = []
labelvalue = []
state = 'endoflabelvalue'
else:
labelvalue.append(char)
elif state == 'endoflabelvalue':
if char == ',':
state = 'labelname'
elif char == '}':
state = 'endoflabels'
else:
raise ValueError("Invalid line: " + text)
elif state == 'labelvalueslash':
state = 'labelvalue'
if char == '\\':
labelvalue.append('\\')
elif char == 'n':
labelvalue.append('\n')
elif char == '"':
labelvalue.append('"')
else:
labelvalue.append('\\' + char)
elif state == 'endoflabels':
if char == ' ':
break
else:
raise ValueError("Invalid line: " + text)
labels_len += 1
return labels, labels_len
def _parse_labels(text):
labels = {}
# Raise error if we don't have valid labels
if text and "=" not in text:
raise ValueError
# Copy original labels
sub_labels = text
try:
# Process one label at a time
while sub_labels:
# The label name is before the equal
value_start = sub_labels.index("=")
label_name = sub_labels[:value_start]
sub_labels = sub_labels[value_start + 1:]
# Check for missing quotes
if not sub_labels or sub_labels[0] != '"':
raise ValueError
# The first quote is guaranteed to be after the equal
value_substr = sub_labels[1:]
# Check for extra commas
if not label_name or label_name[0] == ',':
raise ValueError
if not value_substr or value_substr[-1] == ',':
raise ValueError
# Find the last unescaped quote
i = 0
while i < len(value_substr):
i = value_substr.index('"', i)
if not _is_character_escaped(value_substr[:i], i):
break
i += 1
# The label value is between the first and last quote
quote_end = i + 1
label_value = sub_labels[1:quote_end]
# Replace escaping if needed
if "\\" in label_value:
label_value = _replace_escaping(label_value)
if not METRIC_LABEL_NAME_RE.match(label_name):
raise ValueError("invalid line, bad label name: " + text)
if label_name in labels:
raise ValueError("invalid line, duplicate label name: " + text)
labels[label_name] = label_value
# Remove the processed label from the sub-slice for next iteration
sub_labels = sub_labels[quote_end + 1:]
if sub_labels.startswith(","):
next_comma = 1
else:
next_comma = 0
sub_labels = sub_labels[next_comma:]
# Check for missing commas
if sub_labels and next_comma == 0:
raise ValueError
return labels
except ValueError:
raise ValueError("Invalid labels: " + text)
def _parse_sample(text):
separator = " # "
# Detect the labels in the text
label_start = _next_unquoted_char(text, '{')
label_start = text.find("{")
if label_start == -1 or separator in text[:label_start]:
# We don't have labels, but there could be an exemplar.
name_end = _next_unquoted_char(text, ' ')
name_end = text.index(" ")
name = text[:name_end]
if not _is_valid_legacy_metric_name(name):
raise ValueError("invalid metric name:" + text)
# Parse the remaining text after the name
remaining_text = text[name_end + 1:]
value, timestamp, exemplar = _parse_remaining_text(remaining_text)
return Sample(name, {}, value, timestamp, exemplar)
# The name is before the labels
name = text[:label_start]
label_end = _next_unquoted_char(text, '}')
labels = parse_labels(text[label_start + 1:label_end], True)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
elif '__name__' in labels:
raise ValueError("metric name specified more than once")
if separator not in text:
# Line doesn't contain an exemplar
# We can use `rindex` to find `label_end`
label_end = text.rindex("}")
label = text[label_start + 1:label_end]
labels = _parse_labels(label)
else:
# Line potentially contains an exemplar
# Fallback to parsing labels with a state machine
labels, labels_len = _parse_labels_with_state_machine(text[label_start + 1:])
label_end = labels_len + len(name)
# Parsing labels succeeded, continue parsing the remaining text
remaining_text = text[label_end + 2:]
value, timestamp, exemplar = _parse_remaining_text(remaining_text)
@@ -155,12 +294,7 @@ def _parse_remaining_text(text):
text = split_text[1]
it = iter(text)
in_quotes = False
for char in it:
if char == '"':
in_quotes = not in_quotes
if in_quotes:
continue
if state == 'timestamp':
if char == '#' and not timestamp:
state = 'exemplarspace'
@@ -180,9 +314,8 @@ def _parse_remaining_text(text):
raise ValueError("Invalid line: " + text)
elif state == 'exemplarstartoflabels':
if char == '{':
label_start = _next_unquoted_char(text, '{')
label_end = _last_unquoted_char(text, '}')
exemplar_labels = parse_labels(text[label_start + 1:label_end], True)
label_start, label_end = text.index("{"), text.rindex("}")
exemplar_labels = _parse_labels(text[label_start + 1:label_end])
state = 'exemplarparsedlabels'
else:
raise ValueError("Invalid line: " + text)
@@ -231,154 +364,6 @@ def _parse_remaining_text(text):
return val, ts, exemplar
def _parse_nh_sample(text, suffixes):
"""Determines if the line has a native histogram sample, and parses it if so."""
labels_start = _next_unquoted_char(text, '{')
labels_end = -1
# Finding a native histogram sample requires careful parsing of
# possibly-quoted text, which can appear in metric names, label names, and
# values.
#
# First, we need to determine if there are metric labels. Find the space
# between the metric definition and the rest of the line. Look for unquoted
# space or {.
i = 0
has_metric_labels = False
i = _next_unquoted_char(text, ' {')
if i == -1:
return
# If the first unquoted char was a {, then that is the metric labels (which
# could contain a UTF-8 metric name).
if text[i] == '{':
has_metric_labels = True
# Consume the labels -- jump ahead to the close bracket.
labels_end = i = _next_unquoted_char(text, '}', i)
if labels_end == -1:
raise ValueError
# If there is no subsequent unquoted {, then it's definitely not a nh.
nh_value_start = _next_unquoted_char(text, '{', i + 1)
if nh_value_start == -1:
return
# Edge case: if there is an unquoted # between the metric definition and the {,
# then this is actually an exemplar
exemplar = _next_unquoted_char(text, '#', i + 1)
if exemplar != -1 and exemplar < nh_value_start:
return
nh_value_end = _next_unquoted_char(text, '}', nh_value_start)
if nh_value_end == -1:
raise ValueError
if has_metric_labels:
labelstext = text[labels_start + 1:labels_end]
labels = parse_labels(labelstext, True)
name_end = labels_start
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
# Edge case: the only "label" is the name definition.
if not labels:
labels = None
nh_value = text[nh_value_start:]
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, labels, None, None, None, nat_hist_value)
# check if it's a native histogram
else:
nh_value = text[nh_value_start:]
name_end = nh_value_start - 1
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram should have no suffixes", name)
# Not possible for UTF-8 name here, that would have been caught as having a labelset.
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, None, None, None, None, nat_hist_value)
def _parse_nh_struct(text):
pattern = r'(\w+):\s*([^,}]+)'
re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+(,\d+:\d+)*)\]')
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')
items = dict(re.findall(pattern, text))
span_matches = re_spans.findall(text)
deltas = dict(re_deltas.findall(text))
count_value = int(items['count'])
sum_value = int(items['sum'])
schema = int(items['schema'])
zero_threshold = float(items['zero_threshold'])
zero_count = int(items['zero_count'])
pos_spans = _compose_spans(span_matches, 'positive_spans')
neg_spans = _compose_spans(span_matches, 'negative_spans')
pos_deltas = _compose_deltas(deltas, 'positive_deltas')
neg_deltas = _compose_deltas(deltas, 'negative_deltas')
return NativeHistogram(
count_value=count_value,
sum_value=sum_value,
schema=schema,
zero_threshold=zero_threshold,
zero_count=zero_count,
pos_spans=pos_spans,
neg_spans=neg_spans,
pos_deltas=pos_deltas,
neg_deltas=neg_deltas
)
def _compose_spans(span_matches, spans_name):
"""Takes a list of span matches (expected to be a list of tuples) and a string
(the expected span list name) and processes the list so that the values extracted
from the span matches can be used to compose a tuple of BucketSpan objects"""
spans = {}
for match in span_matches:
# Extract the key from the match (first element of the tuple).
key = match[0]
# Extract the value from the match (second element of the tuple).
# Split the value string by commas to get individual pairs,
# split each pair by ':' to get start and end, and convert them to integers.
value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')]
# Store the processed value in the spans dictionary with the key.
spans[key] = value
if spans_name not in spans:
return None
out_spans = []
# Iterate over each start and end tuple in the list of tuples for the specified spans_name.
for start, end in spans[spans_name]:
# Compose a BucketSpan object with the start and end values
# and append it to the out_spans list.
out_spans.append(BucketSpan(start, end))
# Convert to tuple
out_spans_tuple = tuple(out_spans)
return out_spans_tuple
def _compose_deltas(deltas, deltas_name):
"""Takes a list of deltas matches (a dictionary) and a string (the expected delta list name),
and processes its elements to compose a tuple of integers representing the deltas"""
if deltas_name not in deltas:
return None
out_deltas = deltas.get(deltas_name)
if out_deltas is not None and out_deltas.strip():
elems = out_deltas.split(',')
# Convert each element in the list elems to an integer
# after stripping whitespace and create a tuple from these integers.
out_deltas_tuple = tuple(int(x.strip()) for x in elems)
return out_deltas_tuple
def _group_for_sample(sample, name, typ):
if typ == 'info':
# We can't distinguish between groups for info metrics.
@@ -421,8 +406,6 @@ def _check_histogram(samples, name):
for s in samples:
suffix = s.name[len(name):]
g = _group_for_sample(s, name, 'histogram')
if len(suffix) == 0:
continue
if g != group or s.timestamp != timestamp:
if group is not None:
do_checks()
@@ -498,14 +481,11 @@ def text_fd_to_metric_families(fd):
raise ValueError("Units not allowed for this metric type: " + name)
if typ in ['histogram', 'gaugehistogram']:
_check_histogram(samples, name)
_validate_metric_name(name)
metric = Metric(name, documentation, typ, unit)
# TODO: check labelvalues are valid utf8
metric.samples = samples
return metric
is_nh = False
typ = None
for line in fd:
if line[-1] == '\n':
line = line[:-1]
@@ -519,19 +499,16 @@ def text_fd_to_metric_families(fd):
if line == '# EOF':
eof = True
elif line.startswith('#'):
parts = _split_quoted(line, ' ', 3)
parts = line.split(' ', 3)
if len(parts) < 4:
raise ValueError("Invalid line: " + line)
candidate_name, quoted = _unquote_unescape(parts[2])
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
if candidate_name == name and samples:
if parts[2] == name and samples:
raise ValueError("Received metadata after samples: " + line)
if candidate_name != name:
if parts[2] != name:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# New metric
name = candidate_name
name = parts[2]
unit = None
typ = None
documentation = None
@@ -540,8 +517,8 @@ def text_fd_to_metric_families(fd):
group_timestamp = None
group_timestamp_samples = set()
samples = []
allowed_names = [candidate_name]
allowed_names = [parts[2]]
if parts[1] == 'HELP':
if documentation is not None:
raise ValueError("More than one HELP for metric: " + line)
@@ -560,25 +537,12 @@ def text_fd_to_metric_families(fd):
else:
raise ValueError("Invalid line: " + line)
else:
if typ == 'histogram':
# set to true to account for native histograms naming exceptions/sanitizing differences
is_nh = True
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
# It's not a native histogram
if sample is None:
is_nh = False
sample = _parse_sample(line)
else:
is_nh = False
sample = _parse_sample(line)
if sample.name not in allowed_names and not is_nh:
sample = _parse_sample(line)
if sample.name not in allowed_names:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# Start an unknown metric.
candidate_name, quoted = _unquote_unescape(sample.name)
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
name = candidate_name
name = sample.name
documentation = None
unit = None
typ = 'unknown'
@@ -606,29 +570,26 @@ def text_fd_to_metric_families(fd):
or _isUncanonicalNumber(sample.labels['quantile']))):
raise ValueError("Invalid quantile label: " + line)
if not is_nh:
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
else:
group_timestamp_samples = set()
series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)
group = g
group_timestamp = sample.timestamp
seen_groups.add(g)
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
else:
group_timestamp_samples = set()
series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)
group = g
group_timestamp = sample.timestamp
seen_groups.add(g)
if typ == 'stateset' and sample.value not in [0, 1]:
raise ValueError("Stateset samples can only have values zero and one: " + line)
@@ -645,7 +606,7 @@ def text_fd_to_metric_families(fd):
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
or (typ in ['counter'] and sample.name.endswith('_total'))):
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)

View File

@@ -1,13 +1,9 @@
import io as StringIO
import re
import string
from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple
from .metrics_core import Metric
from .samples import Sample
from .validation import (
_is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
)
def text_string_to_metric_families(text: str) -> Iterable[Metric]:
@@ -49,172 +45,54 @@ def _is_character_escaped(s: str, charpos: int) -> bool:
return num_bslashes % 2 == 1
def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
def _parse_labels(labels_string: str) -> Dict[str, str]:
labels: Dict[str, str] = {}
# Return if we don't have valid labels
if "=" not in labels_string:
return labels
escaping = False
if "\\" in labels_string:
escaping = True
# Copy original labels
sub_labels = labels_string.strip()
if openmetrics and sub_labels and sub_labels[0] == ',':
raise ValueError("leading comma: " + labels_string)
sub_labels = labels_string
try:
# Process one label at a time
while sub_labels:
# The label name is before the equal, or if there's no equal, that's the
# metric name.
name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
if not value_term:
if openmetrics:
raise ValueError("empty term in line: " + labels_string)
continue
label_name, quoted_name = _unquote_unescape(name_term)
if not quoted_name and not _is_valid_legacy_metric_name(label_name):
raise ValueError("unquoted UTF-8 metric name")
# Check for missing quotes
if not value_term or value_term[0] != '"':
raise ValueError
# The label name is before the equal
value_start = sub_labels.index("=")
label_name = sub_labels[:value_start]
sub_labels = sub_labels[value_start + 1:].lstrip()
# Find the first quote after the equal
quote_start = sub_labels.index('"') + 1
value_substr = sub_labels[quote_start:]
# The first quote is guaranteed to be after the equal.
# Make sure that the next unescaped quote is the last character.
i = 1
while i < len(value_term):
i = value_term.index('"', i)
if not _is_character_escaped(value_term[:i], i):
# Find the last unescaped quote
i = 0
while i < len(value_substr):
i = value_substr.index('"', i)
if not _is_character_escaped(value_substr, i):
break
i += 1
# The label value is between the first and last quote
quote_end = i + 1
if quote_end != len(value_term):
raise ValueError("unexpected text after quote: " + labels_string)
label_value = sub_labels[quote_start:quote_end]
# Replace escaping if needed
if escaping:
label_value = _replace_escaping(label_value)
labels[label_name.strip()] = label_value
# Remove the processed label from the sub-slice for next iteration
sub_labels = sub_labels[quote_end + 1:]
next_comma = sub_labels.find(",") + 1
sub_labels = sub_labels[next_comma:].lstrip()
label_value, _ = _unquote_unescape(value_term)
if label_name == '__name__':
_validate_metric_name(label_name)
else:
_validate_labelname(label_name)
if label_name in labels:
raise ValueError("invalid line, duplicate label name: " + labels_string)
labels[label_name] = label_value
return labels
except ValueError:
raise ValueError("Invalid labels: " + labels_string)
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
"""Extract the next comma-separated label term from the text. The results
are stripped terms for the label name, label value, and then the remainder
of the string including the final , or }.
Raises ValueError if the term is empty and we're in openmetrics mode.
"""
# There may be a leading comma, which is fine here.
if text[0] == ',':
text = text[1:]
if not text:
return "", "", ""
if text[0] == ',':
raise ValueError("multiple commas")
splitpos = _next_unquoted_char(text, '=,}')
if splitpos >= 0 and text[splitpos] == "=":
labelname = text[:splitpos]
text = text[splitpos + 1:]
splitpos = _next_unquoted_char(text, ',}')
else:
labelname = "__name__"
if splitpos == -1:
splitpos = len(text)
term = text[:splitpos]
if not term and openmetrics:
raise ValueError("empty term:", term)
rest = text[splitpos:]
return labelname, term.strip(), rest.strip()
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
"""Return position of next unquoted character in tuple, or -1 if not found.
It is always assumed that the first character being checked is not already
inside quotes.
"""
in_quotes = False
if chs is None:
chs = string.whitespace
for i, c in enumerate(text[startidx:]):
if c == '"' and not _is_character_escaped(text, startidx + i):
in_quotes = not in_quotes
if not in_quotes:
if c in chs:
return startidx + i
return -1
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
"""Return position of last unquoted character in list, or -1 if not found."""
i = len(text) - 1
in_quotes = False
if chs is None:
chs = string.whitespace
while i > 0:
if text[i] == '"' and not _is_character_escaped(text, i):
in_quotes = not in_quotes
if not in_quotes:
if text[i] in chs:
return i
i -= 1
return -1
def _split_quoted(text, separator, maxsplit=0):
"""Splits on split_ch similarly to strings.split, skipping separators if
they are inside quotes.
"""
tokens = ['']
x = 0
while x < len(text):
split_pos = _next_unquoted_char(text, separator, x)
if split_pos == -1:
tokens[-1] = text[x:]
x = len(text)
continue
# If the first character is the separator keep going. This happens when
# there are double whitespace characters separating symbols.
if split_pos == x:
x += 1
continue
if maxsplit > 0 and len(tokens) > maxsplit:
tokens[-1] = text[x:]
break
tokens[-1] = text[x:split_pos]
x = split_pos + 1
tokens.append('')
return tokens
def _unquote_unescape(text):
"""Returns the string, and true if it was quoted."""
if not text:
return text, False
quoted = False
text = text.strip()
if text[0] == '"':
if len(text) == 1 or text[-1] != '"':
raise ValueError("missing close quote")
text = text[1:-1]
quoted = True
if "\\" in text:
text = _replace_escaping(text)
return text, quoted
raise ValueError("Invalid labels: %s" % labels_string)
# If we have multiple values only consider the first
@@ -226,50 +104,34 @@ def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
values = [value.strip() for value in s.split(separator) if value.strip()]
if not values:
return float(s), None
value = _parse_value(values[0])
timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
value = float(values[0])
timestamp = (float(values[-1]) / 1000) if len(values) > 1 else None
return value, timestamp
def _parse_value(value):
value = ''.join(value)
if value != value.strip() or '_' in value:
raise ValueError(f"Invalid value: {value!r}")
try:
return int(value)
except ValueError:
return float(value)
def _parse_sample(text):
separator = " # "
def _parse_sample(text: str) -> Sample:
# Detect the labels in the text
label_start = _next_unquoted_char(text, '{')
if label_start == -1 or separator in text[:label_start]:
# We don't have labels, but there could be an exemplar.
name_end = _next_unquoted_char(text, ' \t')
name = text[:name_end].strip()
if not _is_valid_legacy_metric_name(name):
raise ValueError("invalid metric name:" + text)
# Parse the remaining text after the name
remaining_text = text[name_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
try:
label_start, label_end = text.index("{"), text.rindex("}")
# The name is before the labels
name = text[:label_start].strip()
# We ignore the starting curly brace
label = text[label_start + 1:label_end]
# The value is after the label end (ignoring curly brace)
value, timestamp = _parse_value_and_timestamp(text[label_end + 1:])
return Sample(name, _parse_labels(label), value, timestamp)
# We don't have labels
except ValueError:
# Detect what separator is used
separator = " "
if separator not in text:
separator = "\t"
name_end = text.index(separator)
name = text[:name_end]
# The value is after the name
value, timestamp = _parse_value_and_timestamp(text[name_end:])
return Sample(name, {}, value, timestamp)
name = text[:label_start].strip()
label_end = _next_unquoted_char(text[label_start:], '}') + label_start
labels = parse_labels(text[label_start + 1:label_end], False)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
elif '__name__' in labels:
raise ValueError("metric name specified more than once")
# Parsing labels succeeded, continue parsing the remaining text
remaining_text = text[label_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, labels, value, timestamp)
def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
@@ -306,38 +168,28 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
line = line.strip()
if line.startswith('#'):
parts = _split_quoted(line, None, 3)
parts = line.split(None, 3)
if len(parts) < 2:
continue
candidate_name, quoted = '', False
if len(parts) > 2:
# Ignore comment tokens
if parts[1] != 'TYPE' and parts[1] != 'HELP':
continue
candidate_name, quoted = _unquote_unescape(parts[2])
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
if parts[1] == 'HELP':
if candidate_name != name:
if parts[2] != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = candidate_name
name = parts[2]
typ = 'untyped'
samples = []
allowed_names = [candidate_name]
allowed_names = [parts[2]]
if len(parts) == 4:
documentation = _replace_help_escaping(parts[3])
else:
documentation = ''
elif parts[1] == 'TYPE':
if len(parts) < 4:
raise ValueError
if candidate_name != name:
if parts[2] != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = candidate_name
name = parts[2]
documentation = ''
samples = []
typ = parts[3]
@@ -348,6 +200,9 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
'histogram': ['_count', '_sum', '_bucket'],
}.get(typ, [''])
allowed_names = [name + n for n in allowed_names]
else:
# Ignore other comment tokens
pass
elif line == '':
# Ignore blank lines
pass

View File

@@ -103,7 +103,7 @@ class CollectorRegistry(Collector):
only samples with the given names.
Intended usage is:
generate_latest(REGISTRY.restricted_registry(['a_timeseries']), escaping)
generate_latest(REGISTRY.restricted_registry(['a_timeseries']))
Experimental."""
names = set(names)

View File

@@ -1,4 +1,4 @@
from typing import Dict, NamedTuple, Optional, Sequence, Union
from typing import Dict, NamedTuple, Optional, Union
class Timestamp:
@@ -28,16 +28,7 @@ class Timestamp:
return not self == other
def __gt__(self, other: "Timestamp") -> bool:
return self.nsec > other.nsec if self.sec == other.sec else self.sec > other.sec
def __lt__(self, other: "Timestamp") -> bool:
return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec
# BucketSpan is experimental and subject to change at any time.
class BucketSpan(NamedTuple):
offset: int
length: int
return self.sec > other.sec or self.nsec > other.nsec
# Timestamp and exemplar are optional.
@@ -51,24 +42,9 @@ class Exemplar(NamedTuple):
timestamp: Optional[Union[float, Timestamp]] = None
# NativeHistogram is experimental and subject to change at any time.
class NativeHistogram(NamedTuple):
count_value: float
sum_value: float
schema: int
zero_threshold: float
zero_count: float
pos_spans: Optional[Sequence[BucketSpan]] = None
neg_spans: Optional[Sequence[BucketSpan]] = None
pos_deltas: Optional[Sequence[int]] = None
neg_deltas: Optional[Sequence[int]] = None
nh_exemplars: Optional[Sequence[Exemplar]] = None
class Sample(NamedTuple):
name: str
labels: Dict[str, str]
value: float
timestamp: Optional[Union[float, Timestamp]] = None
exemplar: Optional[Exemplar] = None
native_histogram: Optional[NativeHistogram] = None

View File

@@ -1,5 +1,4 @@
import math
from typing import Union
INF = float("inf")
MINUS_INF = float("-inf")
@@ -23,14 +22,3 @@ def floatToGoString(d):
mantissa = f'{s[0]}.{s[1:dot]}{s[dot + 1:]}'.rstrip('0.')
return f'{mantissa}e+0{dot - 1}'
return s
def parse_version(version_str: str) -> tuple[Union[int, str], ...]:
version: list[Union[int, str]] = []
for part in version_str.split('.'):
try:
version.append(int(part))
except ValueError:
version.append(part)
return tuple(version)

View File

@@ -1,124 +0,0 @@
import os
import re
METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$')
METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$')
def _init_legacy_validation() -> bool:
"""Retrieve name validation setting from environment."""
return os.environ.get("PROMETHEUS_LEGACY_NAME_VALIDATION", 'False').lower() in ('true', '1', 't')
_legacy_validation = _init_legacy_validation()
def get_legacy_validation() -> bool:
"""Return the current status of the legacy validation setting."""
return _legacy_validation
def disable_legacy_validation():
"""Disable legacy name validation, instead allowing all UTF8 characters."""
global _legacy_validation
_legacy_validation = False
def enable_legacy_validation():
"""Enable legacy name validation instead of allowing all UTF8 characters."""
global _legacy_validation
_legacy_validation = True
def _validate_metric_name(name: str) -> None:
"""Raises ValueError if the provided name is not a valid metric name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
if not name:
raise ValueError("metric name cannot be empty")
if _legacy_validation:
if not METRIC_NAME_RE.match(name):
raise ValueError("invalid metric name " + name)
try:
name.encode('utf-8')
except UnicodeDecodeError:
raise ValueError("invalid metric name " + name)
def _is_valid_legacy_metric_name(name: str) -> bool:
"""Returns true if the provided metric name conforms to the legacy validation scheme."""
if len(name) == 0:
return False
return METRIC_NAME_RE.match(name) is not None
def _validate_metric_label_name_token(tok: str) -> None:
"""Raises ValueError if a parsed label name token is invalid.
UTF-8 names must be quoted.
"""
if not tok:
raise ValueError("invalid label name token " + tok)
quoted = tok[0] == '"' and tok[-1] == '"'
if not quoted or _legacy_validation:
if not METRIC_LABEL_NAME_RE.match(tok):
raise ValueError("invalid label name token " + tok)
return
try:
tok.encode('utf-8')
except UnicodeDecodeError:
raise ValueError("invalid label name token " + tok)
def _validate_labelname(l):
"""Raises ValueError if the provided name is not a valid label name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
if get_legacy_validation():
if not METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)
else:
try:
l.encode('utf-8')
except UnicodeDecodeError:
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)
def _is_valid_legacy_labelname(l: str) -> bool:
"""Returns true if the provided label name conforms to the legacy validation scheme."""
if len(l) == 0:
return False
if METRIC_LABEL_NAME_RE.match(l) is None:
return False
return RESERVED_METRIC_LABEL_NAME_RE.match(l) is None
def _validate_labelnames(cls, labelnames):
"""Raises ValueError if any of the provided names is not a valid label name.
This check uses the global legacy validation setting to determine the validation scheme.
"""
labelnames = tuple(labelnames)
for l in labelnames:
_validate_labelname(l)
if l in cls._reserved_labelnames:
raise ValueError('Reserved label methe fric name: ' + l)
return labelnames
def _validate_exemplar(exemplar):
"""Raises ValueError if the exemplar is invalid."""
runes = 0
for k, v in exemplar.items():
_validate_labelname(k)
runes += len(k)
runes += len(v)
if runes > 128:
raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128')