API refactor
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-10-07 16:25:52 +09:00
parent 76d0d86211
commit 91c7e04474
1171 changed files with 81940 additions and 44117 deletions

View File

@@ -1,9 +1,13 @@
import io as StringIO
import re
import string
from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple
from .metrics_core import Metric
from .samples import Sample
from .validation import (
_is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
)
def text_string_to_metric_families(text: str) -> Iterable[Metric]:
@@ -45,54 +49,172 @@ def _is_character_escaped(s: str, charpos: int) -> bool:
return num_bslashes % 2 == 1
def _parse_labels(labels_string: str) -> Dict[str, str]:
def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
labels: Dict[str, str] = {}
# Return if we don't have valid labels
if "=" not in labels_string:
return labels
escaping = False
if "\\" in labels_string:
escaping = True
# Copy original labels
sub_labels = labels_string
sub_labels = labels_string.strip()
if openmetrics and sub_labels and sub_labels[0] == ',':
raise ValueError("leading comma: " + labels_string)
try:
# Process one label at a time
while sub_labels:
# The label name is before the equal
value_start = sub_labels.index("=")
label_name = sub_labels[:value_start]
sub_labels = sub_labels[value_start + 1:].lstrip()
# Find the first quote after the equal
quote_start = sub_labels.index('"') + 1
value_substr = sub_labels[quote_start:]
# The label name is before the equal, or if there's no equal, that's the
# metric name.
name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
if not value_term:
if openmetrics:
raise ValueError("empty term in line: " + labels_string)
continue
label_name, quoted_name = _unquote_unescape(name_term)
if not quoted_name and not _is_valid_legacy_metric_name(label_name):
raise ValueError("unquoted UTF-8 metric name")
# Check for missing quotes
if not value_term or value_term[0] != '"':
raise ValueError
# Find the last unescaped quote
i = 0
while i < len(value_substr):
i = value_substr.index('"', i)
if not _is_character_escaped(value_substr, i):
# The first quote is guaranteed to be after the equal.
# Make sure that the next unescaped quote is the last character.
i = 1
while i < len(value_term):
i = value_term.index('"', i)
if not _is_character_escaped(value_term[:i], i):
break
i += 1
# The label value is between the first and last quote
quote_end = i + 1
label_value = sub_labels[quote_start:quote_end]
# Replace escaping if needed
if escaping:
label_value = _replace_escaping(label_value)
labels[label_name.strip()] = label_value
# Remove the processed label from the sub-slice for next iteration
sub_labels = sub_labels[quote_end + 1:]
next_comma = sub_labels.find(",") + 1
sub_labels = sub_labels[next_comma:].lstrip()
if quote_end != len(value_term):
raise ValueError("unexpected text after quote: " + labels_string)
label_value, _ = _unquote_unescape(value_term)
if label_name == '__name__':
_validate_metric_name(label_name)
else:
_validate_labelname(label_name)
if label_name in labels:
raise ValueError("invalid line, duplicate label name: " + labels_string)
labels[label_name] = label_value
return labels
except ValueError:
raise ValueError("Invalid labels: %s" % labels_string)
raise ValueError("Invalid labels: " + labels_string)
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
"""Extract the next comma-separated label term from the text. The results
are stripped terms for the label name, label value, and then the remainder
of the string including the final , or }.
Raises ValueError if the term is empty and we're in openmetrics mode.
"""
# There may be a leading comma, which is fine here.
if text[0] == ',':
text = text[1:]
if not text:
return "", "", ""
if text[0] == ',':
raise ValueError("multiple commas")
splitpos = _next_unquoted_char(text, '=,}')
if splitpos >= 0 and text[splitpos] == "=":
labelname = text[:splitpos]
text = text[splitpos + 1:]
splitpos = _next_unquoted_char(text, ',}')
else:
labelname = "__name__"
if splitpos == -1:
splitpos = len(text)
term = text[:splitpos]
if not term and openmetrics:
raise ValueError("empty term:", term)
rest = text[splitpos:]
return labelname, term.strip(), rest.strip()
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
"""Return position of next unquoted character in tuple, or -1 if not found.
It is always assumed that the first character being checked is not already
inside quotes.
"""
in_quotes = False
if chs is None:
chs = string.whitespace
for i, c in enumerate(text[startidx:]):
if c == '"' and not _is_character_escaped(text, startidx + i):
in_quotes = not in_quotes
if not in_quotes:
if c in chs:
return startidx + i
return -1
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
"""Return position of last unquoted character in list, or -1 if not found."""
i = len(text) - 1
in_quotes = False
if chs is None:
chs = string.whitespace
while i > 0:
if text[i] == '"' and not _is_character_escaped(text, i):
in_quotes = not in_quotes
if not in_quotes:
if text[i] in chs:
return i
i -= 1
return -1
def _split_quoted(text, separator, maxsplit=0):
"""Splits on split_ch similarly to strings.split, skipping separators if
they are inside quotes.
"""
tokens = ['']
x = 0
while x < len(text):
split_pos = _next_unquoted_char(text, separator, x)
if split_pos == -1:
tokens[-1] = text[x:]
x = len(text)
continue
# If the first character is the separator keep going. This happens when
# there are double whitespace characters separating symbols.
if split_pos == x:
x += 1
continue
if maxsplit > 0 and len(tokens) > maxsplit:
tokens[-1] = text[x:]
break
tokens[-1] = text[x:split_pos]
x = split_pos + 1
tokens.append('')
return tokens
def _unquote_unescape(text):
"""Returns the string, and true if it was quoted."""
if not text:
return text, False
quoted = False
text = text.strip()
if text[0] == '"':
if len(text) == 1 or text[-1] != '"':
raise ValueError("missing close quote")
text = text[1:-1]
quoted = True
if "\\" in text:
text = _replace_escaping(text)
return text, quoted
# If we have multiple values only consider the first
@@ -104,34 +226,50 @@ def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
values = [value.strip() for value in s.split(separator) if value.strip()]
if not values:
return float(s), None
value = float(values[0])
timestamp = (float(values[-1]) / 1000) if len(values) > 1 else None
value = _parse_value(values[0])
timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
return value, timestamp
def _parse_sample(text: str) -> Sample:
# Detect the labels in the text
def _parse_value(value):
value = ''.join(value)
if value != value.strip() or '_' in value:
raise ValueError(f"Invalid value: {value!r}")
try:
label_start, label_end = text.index("{"), text.rindex("}")
# The name is before the labels
name = text[:label_start].strip()
# We ignore the starting curly brace
label = text[label_start + 1:label_end]
# The value is after the label end (ignoring curly brace)
value, timestamp = _parse_value_and_timestamp(text[label_end + 1:])
return Sample(name, _parse_labels(label), value, timestamp)
# We don't have labels
return int(value)
except ValueError:
# Detect what separator is used
separator = " "
if separator not in text:
separator = "\t"
name_end = text.index(separator)
name = text[:name_end]
# The value is after the name
value, timestamp = _parse_value_and_timestamp(text[name_end:])
return float(value)
def _parse_sample(text):
separator = " # "
# Detect the labels in the text
label_start = _next_unquoted_char(text, '{')
if label_start == -1 or separator in text[:label_start]:
# We don't have labels, but there could be an exemplar.
name_end = _next_unquoted_char(text, ' \t')
name = text[:name_end].strip()
if not _is_valid_legacy_metric_name(name):
raise ValueError("invalid metric name:" + text)
# Parse the remaining text after the name
remaining_text = text[name_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, {}, value, timestamp)
name = text[:label_start].strip()
label_end = _next_unquoted_char(text[label_start:], '}') + label_start
labels = parse_labels(text[label_start + 1:label_end], False)
if not name:
# Name might be in the labels
if '__name__' not in labels:
raise ValueError
name = labels['__name__']
del labels['__name__']
elif '__name__' in labels:
raise ValueError("metric name specified more than once")
# Parsing labels succeeded, continue parsing the remaining text
remaining_text = text[label_end + 1:]
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, labels, value, timestamp)
def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
@@ -168,28 +306,38 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
line = line.strip()
if line.startswith('#'):
parts = line.split(None, 3)
parts = _split_quoted(line, None, 3)
if len(parts) < 2:
continue
candidate_name, quoted = '', False
if len(parts) > 2:
# Ignore comment tokens
if parts[1] != 'TYPE' and parts[1] != 'HELP':
continue
candidate_name, quoted = _unquote_unescape(parts[2])
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
raise ValueError
if parts[1] == 'HELP':
if parts[2] != name:
if candidate_name != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = parts[2]
name = candidate_name
typ = 'untyped'
samples = []
allowed_names = [parts[2]]
allowed_names = [candidate_name]
if len(parts) == 4:
documentation = _replace_help_escaping(parts[3])
else:
documentation = ''
elif parts[1] == 'TYPE':
if parts[2] != name:
if len(parts) < 4:
raise ValueError
if candidate_name != name:
if name != '':
yield build_metric(name, documentation, typ, samples)
# New metric
name = parts[2]
name = candidate_name
documentation = ''
samples = []
typ = parts[3]
@@ -200,9 +348,6 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
'histogram': ['_count', '_sum', '_bucket'],
}.get(typ, [''])
allowed_names = [name + n for n in allowed_names]
else:
# Ignore other comment tokens
pass
elif line == '':
# Ignore blank lines
pass