main commit

2025-10-16 16:30:25 +09:00
parent 91c7e04474
commit 537e7b363f
1146 changed files with 45926 additions and 77196 deletions
--- a/venv/lib/python3.12/site-packages/prometheus_client/parser.py
+++ b/venv/lib/python3.12/site-packages/prometheus_client/parser.py
@@ -1,13 +1,9 @@
 import io as StringIO
 import re
-import string
 from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple

 from .metrics_core import Metric
 from .samples import Sample
-from .validation import (
-    _is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
-)


 def text_string_to_metric_families(text: str) -> Iterable[Metric]:
@@ -49,172 +45,54 @@ def _is_character_escaped(s: str, charpos: int) -> bool:
    return num_bslashes % 2 == 1


-def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
+def _parse_labels(labels_string: str) -> Dict[str, str]:
    labels: Dict[str, str] = {}
+    # Return if we don't have valid labels
+    if "=" not in labels_string:
+        return labels
+
+    escaping = False
+    if "\\" in labels_string:
+        escaping = True

    # Copy original labels
-    sub_labels = labels_string.strip()
-    if openmetrics and sub_labels and sub_labels[0] == ',':
-        raise ValueError("leading comma: " + labels_string)
+    sub_labels = labels_string
    try:
        # Process one label at a time
        while sub_labels:
-            # The label name is before the equal, or if there's no equal, that's the
-            # metric name.
-            
-            name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
-            if not value_term:
-                if openmetrics:
-                    raise ValueError("empty term in line: " + labels_string)
-                continue
-            
-            label_name, quoted_name = _unquote_unescape(name_term)
-                
-            if not quoted_name and not _is_valid_legacy_metric_name(label_name):
-                raise ValueError("unquoted UTF-8 metric name")
-                
-            # Check for missing quotes 
-            if not value_term or value_term[0] != '"':
-                raise ValueError
+            # The label name is before the equal
+            value_start = sub_labels.index("=")
+            label_name = sub_labels[:value_start]
+            sub_labels = sub_labels[value_start + 1:].lstrip()
+            # Find the first quote after the equal
+            quote_start = sub_labels.index('"') + 1
+            value_substr = sub_labels[quote_start:]

-            # The first quote is guaranteed to be after the equal.
-            # Make sure that the next unescaped quote is the last character.
-            i = 1
-            while i < len(value_term):
-                i = value_term.index('"', i)
-                if not _is_character_escaped(value_term[:i], i):
+            # Find the last unescaped quote
+            i = 0
+            while i < len(value_substr):
+                i = value_substr.index('"', i)
+                if not _is_character_escaped(value_substr, i):
                    break
                i += 1
+
            # The label value is between the first and last quote
            quote_end = i + 1
-            if quote_end != len(value_term):
-                raise ValueError("unexpected text after quote: " + labels_string)
+            label_value = sub_labels[quote_start:quote_end]
+            # Replace escaping if needed
+            if escaping:
+                label_value = _replace_escaping(label_value)
+            labels[label_name.strip()] = label_value
+
+            # Remove the processed label from the sub-slice for next iteration
+            sub_labels = sub_labels[quote_end + 1:]
+            next_comma = sub_labels.find(",") + 1
+            sub_labels = sub_labels[next_comma:].lstrip()

-            label_value, _ = _unquote_unescape(value_term)
-            if label_name == '__name__':
-                _validate_metric_name(label_name)
-            else:
-                _validate_labelname(label_name)
-            if label_name in labels:
-                raise ValueError("invalid line, duplicate label name: " + labels_string)
-            labels[label_name] = label_value
        return labels
+
    except ValueError:
-        raise ValueError("Invalid labels: " + labels_string)
-    
-
-def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
-    """Extract the next comma-separated label term from the text. The results
-    are stripped terms for the label name, label value, and then the remainder
-    of the string including the final , or }.
-    
-    Raises ValueError if the term is empty and we're in openmetrics mode.
-    """
-    
-    # There may be a leading comma, which is fine here.
-    if text[0] == ',':
-        text = text[1:]
-        if not text:
-            return "", "", ""
-        if text[0] == ',':
-            raise ValueError("multiple commas")
-
-    splitpos = _next_unquoted_char(text, '=,}')
-    if splitpos >= 0 and text[splitpos] == "=":
-        labelname = text[:splitpos]
-        text = text[splitpos + 1:]
-        splitpos = _next_unquoted_char(text, ',}')
-    else:
-        labelname = "__name__"
-
-    if splitpos == -1:
-        splitpos = len(text)
-    term = text[:splitpos]
-    if not term and openmetrics:
-        raise ValueError("empty term:", term)
-    
-    rest = text[splitpos:]
-    return labelname, term.strip(), rest.strip()
-
-
-def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
-    """Return position of next unquoted character in tuple, or -1 if not found.
-    
-    It is always assumed that the first character being checked is not already
-    inside quotes.
-    """
-    in_quotes = False
-    if chs is None:
-        chs = string.whitespace
-
-    for i, c in enumerate(text[startidx:]):
-        if c == '"' and not _is_character_escaped(text, startidx + i):
-            in_quotes = not in_quotes
-        if not in_quotes:
-            if c in chs:
-                return startidx + i
-    return -1
-
-
-def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
-    """Return position of last unquoted character in list, or -1 if not found."""
-    i = len(text) - 1
-    in_quotes = False
-    if chs is None:
-        chs = string.whitespace
-    while i > 0:
-        if text[i] == '"' and not _is_character_escaped(text, i):
-            in_quotes = not in_quotes
-            
-        if not in_quotes:
-            if text[i] in chs:
-                return i
-        i -= 1
-    return -1
-
-
-def _split_quoted(text, separator, maxsplit=0):
-    """Splits on split_ch similarly to strings.split, skipping separators if
-    they are inside quotes.
-    """
-
-    tokens = ['']
-    x = 0
-    while x < len(text):
-        split_pos = _next_unquoted_char(text, separator, x)
-        if split_pos == -1:
-            tokens[-1] = text[x:]
-            x = len(text)
-            continue
-        # If the first character is the separator keep going. This happens when
-        # there are double whitespace characters separating symbols.
-        if split_pos == x:
-            x += 1
-            continue
-
-        if maxsplit > 0 and len(tokens) > maxsplit:
-            tokens[-1] = text[x:]
-            break
-        tokens[-1] = text[x:split_pos]
-        x = split_pos + 1
-        tokens.append('')
-    return tokens
-
-
-def _unquote_unescape(text):
-    """Returns the string, and true if it was quoted."""
-    if not text:
-        return text, False
-    quoted = False
-    text = text.strip()
-    if text[0] == '"':
-        if len(text) == 1 or text[-1] != '"':
-            raise ValueError("missing close quote")
-        text = text[1:-1]
-        quoted = True
-    if "\\" in text:
-        text = _replace_escaping(text)
-    return text, quoted
+        raise ValueError("Invalid labels: %s" % labels_string)


 # If we have multiple values only consider the first
@@ -226,50 +104,34 @@ def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
    values = [value.strip() for value in s.split(separator) if value.strip()]
    if not values:
        return float(s), None
-    value = _parse_value(values[0])
-    timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
+    value = float(values[0])
+    timestamp = (float(values[-1]) / 1000) if len(values) > 1 else None
    return value, timestamp


-def _parse_value(value):
-    value = ''.join(value)
-    if value != value.strip() or '_' in value:
-        raise ValueError(f"Invalid value: {value!r}")
-    try:
-        return int(value)
-    except ValueError:
-        return float(value)
-    
-
-def _parse_sample(text):
-    separator = " # "
+def _parse_sample(text: str) -> Sample:
    # Detect the labels in the text
-    label_start = _next_unquoted_char(text, '{')
-    if label_start == -1 or separator in text[:label_start]:
-        # We don't have labels, but there could be an exemplar.
-        name_end = _next_unquoted_char(text, ' \t')
-        name = text[:name_end].strip()
-        if not _is_valid_legacy_metric_name(name):
-            raise ValueError("invalid metric name:" + text)
-        # Parse the remaining text after the name
-        remaining_text = text[name_end + 1:]
-        value, timestamp = _parse_value_and_timestamp(remaining_text)
+    try:
+        label_start, label_end = text.index("{"), text.rindex("}")
+        # The name is before the labels
+        name = text[:label_start].strip()
+        # We ignore the starting curly brace
+        label = text[label_start + 1:label_end]
+        # The value is after the label end (ignoring curly brace)
+        value, timestamp = _parse_value_and_timestamp(text[label_end + 1:])
+        return Sample(name, _parse_labels(label), value, timestamp)
+
+    # We don't have labels
+    except ValueError:
+        # Detect what separator is used
+        separator = " "
+        if separator not in text:
+            separator = "\t"
+        name_end = text.index(separator)
+        name = text[:name_end]
+        # The value is after the name
+        value, timestamp = _parse_value_and_timestamp(text[name_end:])
        return Sample(name, {}, value, timestamp)
-    name = text[:label_start].strip()
-    label_end = _next_unquoted_char(text[label_start:], '}') + label_start
-    labels = parse_labels(text[label_start + 1:label_end], False)
-    if not name:
-        # Name might be in the labels
-        if '__name__' not in labels:
-            raise ValueError
-        name = labels['__name__']
-        del labels['__name__']
-    elif '__name__' in labels:
-        raise ValueError("metric name specified more than once")
-    # Parsing labels succeeded, continue parsing the remaining text
-    remaining_text = text[label_end + 1:]
-    value, timestamp = _parse_value_and_timestamp(remaining_text)
-    return Sample(name, labels, value, timestamp)


 def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
@@ -306,38 +168,28 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
        line = line.strip()

        if line.startswith('#'):
-            parts = _split_quoted(line, None, 3)
+            parts = line.split(None, 3)
            if len(parts) < 2:
                continue
-            candidate_name, quoted = '', False
-            if len(parts) > 2:
-                # Ignore comment tokens
-                if parts[1] != 'TYPE' and parts[1] != 'HELP':
-                    continue
-                candidate_name, quoted = _unquote_unescape(parts[2])
-                if not quoted and not _is_valid_legacy_metric_name(candidate_name):
-                    raise ValueError
            if parts[1] == 'HELP':
-                if candidate_name != name:
+                if parts[2] != name:
                    if name != '':
                        yield build_metric(name, documentation, typ, samples)
                    # New metric
-                    name = candidate_name
+                    name = parts[2]
                    typ = 'untyped'
                    samples = []
-                    allowed_names = [candidate_name]
+                    allowed_names = [parts[2]]
                if len(parts) == 4:
                    documentation = _replace_help_escaping(parts[3])
                else:
                    documentation = ''
            elif parts[1] == 'TYPE':
-                if len(parts) < 4:
-                    raise ValueError
-                if candidate_name != name:
+                if parts[2] != name:
                    if name != '':
                        yield build_metric(name, documentation, typ, samples)
                    # New metric
-                    name = candidate_name
+                    name = parts[2]
                    documentation = ''
                    samples = []
                typ = parts[3]
@@ -348,6 +200,9 @@ def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
                    'histogram': ['_count', '_sum', '_bucket'],
                }.get(typ, [''])
                allowed_names = [name + n for n in allowed_names]
+            else:
+                # Ignore other comment tokens
+                pass
        elif line == '':
            # Ignore blank lines
            pass