API refactor
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-10-07 16:25:52 +09:00
parent 76d0d86211
commit 91c7e04474
1171 changed files with 81940 additions and 44117 deletions

View File

@@ -1,7 +1,7 @@
def to_string(s):
def to_string(s, encoding: str = "utf-8"):
if isinstance(s, str):
return s
elif isinstance(s, bytes):
return s.decode("utf-8", "ignore")
return s.decode(encoding, "ignore")
else:
return s # Not a string we care about

View File

@@ -1,5 +1,7 @@
from typing import List, Union
from redis.commands.search.dialect import DEFAULT_DIALECT
FIELDNAME = object()
@@ -24,7 +26,7 @@ class Reducer:
NAME = None
def __init__(self, *args: List[str]) -> None:
def __init__(self, *args: str) -> None:
self._args = args
self._field = None
self._alias = None
@@ -110,9 +112,11 @@ class AggregateRequest:
self._with_schema = False
self._verbatim = False
self._cursor = []
self._dialect = None
self._dialect = DEFAULT_DIALECT
self._add_scores = False
self._scorer = "TFIDF"
def load(self, *fields: List[str]) -> "AggregateRequest":
def load(self, *fields: str) -> "AggregateRequest":
"""
Indicate the fields to be returned in the response. These fields are
returned in addition to any others implicitly specified.
@@ -219,7 +223,7 @@ class AggregateRequest:
self._aggregateplan.extend(_limit.build_args())
return self
def sort_by(self, *fields: List[str], **kwargs) -> "AggregateRequest":
def sort_by(self, *fields: str, **kwargs) -> "AggregateRequest":
"""
Indicate how the results should be sorted. This can also be used for
*top-N* style queries
@@ -292,6 +296,24 @@ class AggregateRequest:
self._with_schema = True
return self
def add_scores(self) -> "AggregateRequest":
"""
If set, includes the score as an ordinary field of the row.
"""
self._add_scores = True
return self
def scorer(self, scorer: str) -> "AggregateRequest":
"""
Use a different scoring function to evaluate document relevance.
Default is `TFIDF`.
:param scorer: The scoring function to use
(e.g. `TFIDF.DOCNORM` or `BM25`)
"""
self._scorer = scorer
return self
def verbatim(self) -> "AggregateRequest":
self._verbatim = True
return self
@@ -315,12 +337,19 @@ class AggregateRequest:
if self._verbatim:
ret.append("VERBATIM")
if self._scorer:
ret.extend(["SCORER", self._scorer])
if self._add_scores:
ret.append("ADDSCORES")
if self._cursor:
ret += self._cursor
if self._loadall:
ret.append("LOAD")
ret.append("*")
elif self._loadfields:
ret.append("LOAD")
ret.append(str(len(self._loadfields)))

View File

@@ -2,13 +2,16 @@ import itertools
import time
from typing import Dict, List, Optional, Union
from redis.client import Pipeline
from redis.client import NEVER_DECODE, Pipeline
from redis.utils import deprecated_function
from ..helpers import get_protocol_version, parse_to_dict
from ..helpers import get_protocol_version
from ._util import to_string
from .aggregation import AggregateRequest, AggregateResult, Cursor
from .document import Document
from .field import Field
from .index_definition import IndexDefinition
from .profile_information import ProfileInformation
from .query import Query
from .result import Result
from .suggestion import SuggestionParser
@@ -20,7 +23,6 @@ ALTER_CMD = "FT.ALTER"
SEARCH_CMD = "FT.SEARCH"
ADD_CMD = "FT.ADD"
ADDHASH_CMD = "FT.ADDHASH"
DROP_CMD = "FT.DROP"
DROPINDEX_CMD = "FT.DROPINDEX"
EXPLAIN_CMD = "FT.EXPLAIN"
EXPLAINCLI_CMD = "FT.EXPLAINCLI"
@@ -32,7 +34,6 @@ SPELLCHECK_CMD = "FT.SPELLCHECK"
DICT_ADD_CMD = "FT.DICTADD"
DICT_DEL_CMD = "FT.DICTDEL"
DICT_DUMP_CMD = "FT.DICTDUMP"
GET_CMD = "FT.GET"
MGET_CMD = "FT.MGET"
CONFIG_CMD = "FT.CONFIG"
TAGVALS_CMD = "FT.TAGVALS"
@@ -65,7 +66,7 @@ class SearchCommands:
def _parse_results(self, cmd, res, **kwargs):
if get_protocol_version(self.client) in ["3", 3]:
return res
return ProfileInformation(res) if cmd == "FT.PROFILE" else res
else:
return self._RESP2_MODULE_CALLBACKS[cmd](res, **kwargs)
@@ -80,6 +81,7 @@ class SearchCommands:
duration=kwargs["duration"],
has_payload=kwargs["query"]._with_payloads,
with_scores=kwargs["query"]._with_scores,
field_encodings=kwargs["query"]._return_fields_decode_as,
)
def _parse_aggregate(self, res, **kwargs):
@@ -98,7 +100,7 @@ class SearchCommands:
with_scores=query._with_scores,
)
return result, parse_to_dict(res[1])
return result, ProfileInformation(res[1])
def _parse_spellcheck(self, res, **kwargs):
corrections = {}
@@ -151,44 +153,43 @@ class SearchCommands:
def create_index(
self,
fields,
no_term_offsets=False,
no_field_flags=False,
stopwords=None,
definition=None,
fields: List[Field],
no_term_offsets: bool = False,
no_field_flags: bool = False,
stopwords: Optional[List[str]] = None,
definition: Optional[IndexDefinition] = None,
max_text_fields=False,
temporary=None,
no_highlight=False,
no_term_frequencies=False,
skip_initial_scan=False,
no_highlight: bool = False,
no_term_frequencies: bool = False,
skip_initial_scan: bool = False,
):
"""
Create the search index. The index must not already exist.
Creates the search index. The index must not already exist.
### Parameters:
For more information, see https://redis.io/commands/ft.create/
- **fields**: a list of TextField or NumericField objects
- **no_term_offsets**: If true, we will not save term offsets in
the index
- **no_field_flags**: If true, we will not save field flags that
allow searching in specific fields
- **stopwords**: If not None, we create the index with this custom
stopword list. The list can be empty
- **max_text_fields**: If true, we will encode indexes as if there
were more than 32 text fields which allows you to add additional
fields (beyond 32).
- **temporary**: Create a lightweight temporary index which will
expire after the specified period of inactivity (in seconds). The
internal idle timer is reset whenever the index is searched or added to.
- **no_highlight**: If true, disabling highlighting support.
Also implied by no_term_offsets.
- **no_term_frequencies**: If true, we avoid saving the term frequencies
in the index.
- **skip_initial_scan**: If true, we do not scan and index.
For more information see `FT.CREATE <https://redis.io/commands/ft.create>`_.
""" # noqa
Args:
fields: A list of Field objects.
no_term_offsets: If `true`, term offsets will not be saved in the index.
no_field_flags: If true, field flags that allow searching in specific fields
will not be saved.
stopwords: If provided, the index will be created with this custom stopword
list. The list can be empty.
definition: If provided, the index will be created with this custom index
definition.
max_text_fields: If true, indexes will be encoded as if there were more than
32 text fields, allowing for additional fields beyond 32.
temporary: Creates a lightweight temporary index which will expire after the
specified period of inactivity. The internal idle timer is reset
whenever the index is searched or added to.
no_highlight: If true, disables highlighting support. Also implied by
`no_term_offsets`.
no_term_frequencies: If true, term frequencies will not be saved in the
index.
skip_initial_scan: If true, the initial scan and indexing will be skipped.
"""
args = [CREATE_CMD, self.index_name]
if definition is not None:
args += definition.args
@@ -252,8 +253,18 @@ class SearchCommands:
For more information see `FT.DROPINDEX <https://redis.io/commands/ft.dropindex>`_.
""" # noqa
delete_str = "DD" if delete_documents else ""
return self.execute_command(DROPINDEX_CMD, self.index_name, delete_str)
args = [DROPINDEX_CMD, self.index_name]
delete_str = (
"DD"
if isinstance(delete_documents, bool) and delete_documents is True
else ""
)
if delete_str:
args.append(delete_str)
return self.execute_command(*args)
def _add_document(
self,
@@ -335,30 +346,30 @@ class SearchCommands:
"""
Add a single document to the index.
### Parameters
Args:
- **doc_id**: the id of the saved document.
- **nosave**: if set to true, we just index the document, and don't
doc_id: the id of the saved document.
nosave: if set to true, we just index the document, and don't
save a copy of it. This means that searches will just
return ids.
- **score**: the document ranking, between 0.0 and 1.0
- **payload**: optional inner-index payload we can save for fast
i access in scoring functions
- **replace**: if True, and the document already is in the index,
we perform an update and reindex the document
- **partial**: if True, the fields specified will be added to the
score: the document ranking, between 0.0 and 1.0
payload: optional inner-index payload we can save for fast
access in scoring functions
replace: if True, and the document already is in the index,
we perform an update and reindex the document
partial: if True, the fields specified will be added to the
existing document.
This has the added benefit that any fields specified
with `no_index`
will not be reindexed again. Implies `replace`
- **language**: Specify the language used for document tokenization.
- **no_create**: if True, the document is only updated and reindexed
language: Specify the language used for document tokenization.
no_create: if True, the document is only updated and reindexed
if it already exists.
If the document does not exist, an error will be
returned. Implies `replace`
- **fields** kwargs dictionary of the document fields to be saved
and/or indexed.
NOTE: Geo points shoule be encoded as strings of "lon,lat"
fields: kwargs dictionary of the document fields to be saved
and/or indexed.
NOTE: Geo points shoule be encoded as strings of "lon,lat"
""" # noqa
return self._add_document(
doc_id,
@@ -393,6 +404,7 @@ class SearchCommands:
doc_id, conn=None, score=score, language=language, replace=replace
)
@deprecated_function(version="2.0.0", reason="deprecated since redisearch 2.0")
def delete_document(self, doc_id, conn=None, delete_actual_document=False):
"""
Delete a document from index
@@ -427,6 +439,7 @@ class SearchCommands:
return Document(id=id, **fields)
@deprecated_function(version="2.0.0", reason="deprecated since redisearch 2.0")
def get(self, *ids):
"""
Returns the full contents of multiple documents.
@@ -497,14 +510,19 @@ class SearchCommands:
For more information see `FT.SEARCH <https://redis.io/commands/ft.search>`_.
""" # noqa
args, query = self._mk_query_args(query, query_params=query_params)
st = time.time()
res = self.execute_command(SEARCH_CMD, *args)
st = time.monotonic()
options = {}
if get_protocol_version(self.client) not in ["3", 3]:
options[NEVER_DECODE] = True
res = self.execute_command(SEARCH_CMD, *args, **options)
if isinstance(res, Pipeline):
return res
return self._parse_results(
SEARCH_CMD, res, query=query, duration=(time.time() - st) * 1000.0
SEARCH_CMD, res, query=query, duration=(time.monotonic() - st) * 1000.0
)
def explain(
@@ -524,7 +542,7 @@ class SearchCommands:
def aggregate(
self,
query: Union[str, Query],
query: Union[AggregateRequest, Cursor],
query_params: Dict[str, Union[str, int, float]] = None,
):
"""
@@ -555,7 +573,7 @@ class SearchCommands:
)
def _get_aggregate_result(
self, raw: List, query: Union[str, Query, AggregateRequest], has_cursor: bool
self, raw: List, query: Union[AggregateRequest, Cursor], has_cursor: bool
):
if has_cursor:
if isinstance(query, Cursor):
@@ -578,7 +596,7 @@ class SearchCommands:
def profile(
self,
query: Union[str, Query, AggregateRequest],
query: Union[Query, AggregateRequest],
limited: bool = False,
query_params: Optional[Dict[str, Union[str, int, float]]] = None,
):
@@ -588,13 +606,13 @@ class SearchCommands:
### Parameters
**query**: This can be either an `AggregateRequest`, `Query` or string.
**query**: This can be either an `AggregateRequest` or `Query`.
**limited**: If set to True, removes details of reader iterator.
**query_params**: Define one or more value parameters.
Each parameter has a name and a value.
"""
st = time.time()
st = time.monotonic()
cmd = [PROFILE_CMD, self.index_name, ""]
if limited:
cmd.append("LIMITED")
@@ -613,20 +631,20 @@ class SearchCommands:
res = self.execute_command(*cmd)
return self._parse_results(
PROFILE_CMD, res, query=query, duration=(time.time() - st) * 1000.0
PROFILE_CMD, res, query=query, duration=(time.monotonic() - st) * 1000.0
)
def spellcheck(self, query, distance=None, include=None, exclude=None):
"""
Issue a spellcheck query
### Parameters
Args:
**query**: search query.
**distance***: the maximal Levenshtein distance for spelling
query: search query.
distance: the maximal Levenshtein distance for spelling
suggestions (default: 1, max: 4).
**include**: specifies an inclusion custom dictionary.
**exclude**: specifies an exclusion custom dictionary.
include: specifies an inclusion custom dictionary.
exclude: specifies an exclusion custom dictionary.
For more information see `FT.SPELLCHECK <https://redis.io/commands/ft.spellcheck>`_.
""" # noqa
@@ -684,6 +702,10 @@ class SearchCommands:
cmd = [DICT_DUMP_CMD, name]
return self.execute_command(*cmd)
@deprecated_function(
version="8.0.0",
reason="deprecated since Redis 8.0, call config_set from core module instead",
)
def config_set(self, option: str, value: str) -> bool:
"""Set runtime configuration option.
@@ -698,6 +720,10 @@ class SearchCommands:
raw = self.execute_command(*cmd)
return raw == "OK"
@deprecated_function(
version="8.0.0",
reason="deprecated since Redis 8.0, call config_get from core module instead",
)
def config_get(self, option: str) -> str:
"""Get runtime configuration option value.
@@ -924,19 +950,24 @@ class AsyncSearchCommands(SearchCommands):
For more information see `FT.SEARCH <https://redis.io/commands/ft.search>`_.
""" # noqa
args, query = self._mk_query_args(query, query_params=query_params)
st = time.time()
res = await self.execute_command(SEARCH_CMD, *args)
st = time.monotonic()
options = {}
if get_protocol_version(self.client) not in ["3", 3]:
options[NEVER_DECODE] = True
res = await self.execute_command(SEARCH_CMD, *args, **options)
if isinstance(res, Pipeline):
return res
return self._parse_results(
SEARCH_CMD, res, query=query, duration=(time.time() - st) * 1000.0
SEARCH_CMD, res, query=query, duration=(time.monotonic() - st) * 1000.0
)
async def aggregate(
self,
query: Union[str, Query],
query: Union[AggregateResult, Cursor],
query_params: Dict[str, Union[str, int, float]] = None,
):
"""
@@ -994,6 +1025,10 @@ class AsyncSearchCommands(SearchCommands):
return self._parse_results(SPELLCHECK_CMD, res)
@deprecated_function(
version="8.0.0",
reason="deprecated since Redis 8.0, call config_set from core module instead",
)
async def config_set(self, option: str, value: str) -> bool:
"""Set runtime configuration option.
@@ -1008,6 +1043,10 @@ class AsyncSearchCommands(SearchCommands):
raw = await self.execute_command(*cmd)
return raw == "OK"
@deprecated_function(
version="8.0.0",
reason="deprecated since Redis 8.0, call config_get from core module instead",
)
async def config_get(self, option: str) -> str:
"""Get runtime configuration option value.

View File

@@ -0,0 +1,3 @@
# Value for the default dialect to be used as a part of
# Search or Aggregate query.
DEFAULT_DIALECT = 2

View File

@@ -4,6 +4,10 @@ from redis import DataError
class Field:
"""
A class representing a field in a document.
"""
NUMERIC = "NUMERIC"
TEXT = "TEXT"
WEIGHT = "WEIGHT"
@@ -13,6 +17,9 @@ class Field:
SORTABLE = "SORTABLE"
NOINDEX = "NOINDEX"
AS = "AS"
GEOSHAPE = "GEOSHAPE"
INDEX_MISSING = "INDEXMISSING"
INDEX_EMPTY = "INDEXEMPTY"
def __init__(
self,
@@ -20,8 +27,24 @@ class Field:
args: List[str] = None,
sortable: bool = False,
no_index: bool = False,
index_missing: bool = False,
index_empty: bool = False,
as_name: str = None,
):
"""
Create a new field object.
Args:
name: The name of the field.
args:
sortable: If `True`, the field will be sortable.
no_index: If `True`, the field will not be indexed.
index_missing: If `True`, it will be possible to search for documents that
have this field missing.
index_empty: If `True`, it will be possible to search for documents that
have this field empty.
as_name: If provided, this alias will be used for the field.
"""
if args is None:
args = []
self.name = name
@@ -33,6 +56,10 @@ class Field:
self.args_suffix.append(Field.SORTABLE)
if no_index:
self.args_suffix.append(Field.NOINDEX)
if index_missing:
self.args_suffix.append(Field.INDEX_MISSING)
if index_empty:
self.args_suffix.append(Field.INDEX_EMPTY)
if no_index and not sortable:
raise ValueError("Non-Sortable non-Indexable fields are ignored")
@@ -91,6 +118,21 @@ class NumericField(Field):
Field.__init__(self, name, args=[Field.NUMERIC], **kwargs)
class GeoShapeField(Field):
"""
GeoShapeField is used to enable within/contain indexing/searching
"""
SPHERICAL = "SPHERICAL"
FLAT = "FLAT"
def __init__(self, name: str, coord_system=None, **kwargs):
args = [Field.GEOSHAPE]
if coord_system:
args.append(coord_system)
Field.__init__(self, name, args=args, **kwargs)
class GeoField(Field):
"""
GeoField is used to define a geo-indexing field in a schema definition
@@ -139,7 +181,7 @@ class VectorField(Field):
``name`` is the name of the field.
``algorithm`` can be "FLAT" or "HNSW".
``algorithm`` can be "FLAT", "HNSW", or "SVS-VAMANA".
``attributes`` each algorithm can have specific attributes. Some of them
are mandatory and some of them are optional. See
@@ -152,10 +194,10 @@ class VectorField(Field):
if sort or noindex:
raise DataError("Cannot set 'sortable' or 'no_index' in Vector fields.")
if algorithm.upper() not in ["FLAT", "HNSW"]:
if algorithm.upper() not in ["FLAT", "HNSW", "SVS-VAMANA"]:
raise DataError(
"Realtime vector indexing supporting 2 Indexing Methods:"
"'FLAT' and 'HNSW'."
"Realtime vector indexing supporting 3 Indexing Methods:"
"'FLAT', 'HNSW', and 'SVS-VAMANA'."
)
attr_li = []

View File

@@ -0,0 +1,14 @@
from typing import Any
class ProfileInformation:
"""
Wrapper around FT.PROFILE response
"""
def __init__(self, info: Any) -> None:
self._info: Any = info
@property
def info(self) -> Any:
return self._info

View File

@@ -1,5 +1,7 @@
from typing import List, Optional, Union
from redis.commands.search.dialect import DEFAULT_DIALECT
class Query:
"""
@@ -35,11 +37,12 @@ class Query:
self._in_order: bool = False
self._sortby: Optional[SortbyField] = None
self._return_fields: List = []
self._return_fields_decode_as: dict = {}
self._summarize_fields: List = []
self._highlight_fields: List = []
self._language: Optional[str] = None
self._expander: Optional[str] = None
self._dialect: Optional[int] = None
self._dialect: int = DEFAULT_DIALECT
def query_string(self) -> str:
"""Return the query string of this query only."""
@@ -53,13 +56,27 @@ class Query:
def return_fields(self, *fields) -> "Query":
"""Add fields to return fields."""
self._return_fields += fields
for field in fields:
self.return_field(field)
return self
def return_field(self, field: str, as_field: Optional[str] = None) -> "Query":
"""Add field to return fields (Optional: add 'AS' name
to the field)."""
def return_field(
self,
field: str,
as_field: Optional[str] = None,
decode_field: Optional[bool] = True,
encoding: Optional[str] = "utf8",
) -> "Query":
"""
Add a field to the list of fields to return.
- **field**: The field to include in query results
- **as_field**: The alias for the field
- **decode_field**: Whether to decode the field from bytes to string
- **encoding**: The encoding to use when decoding the field
"""
self._return_fields.append(field)
self._return_fields_decode_as[field] = encoding if decode_field else None
if as_field is not None:
self._return_fields += ("AS", as_field)
return self
@@ -162,6 +179,8 @@ class Query:
Use a different scoring function to evaluate document relevance.
Default is `TFIDF`.
Since Redis 8.0 default was changed to BM25STD.
:param scorer: The scoring function to use
(e.g. `TFIDF.DOCNORM` or `BM25`)
"""

View File

@@ -1,3 +1,5 @@
from typing import Optional
from ._util import to_string
from .document import Document
@@ -9,11 +11,19 @@ class Result:
"""
def __init__(
self, res, hascontent, duration=0, has_payload=False, with_scores=False
self,
res,
hascontent,
duration=0,
has_payload=False,
with_scores=False,
field_encodings: Optional[dict] = None,
):
"""
- **snippets**: An optional dictionary of the form
{field: snippet_size} for snippet formatting
- duration: the execution time of the query
- has_payload: whether the query has payloads
- with_scores: whether the query has scores
- field_encodings: a dictionary of field encodings if any is provided
"""
self.total = res[0]
@@ -39,18 +49,22 @@ class Result:
fields = {}
if hascontent and res[i + fields_offset] is not None:
fields = (
dict(
dict(
zip(
map(to_string, res[i + fields_offset][::2]),
map(to_string, res[i + fields_offset][1::2]),
)
)
)
if hascontent
else {}
)
keys = map(to_string, res[i + fields_offset][::2])
values = res[i + fields_offset][1::2]
for key, value in zip(keys, values):
if field_encodings is None or key not in field_encodings:
fields[key] = to_string(value)
continue
encoding = field_encodings[key]
# If the encoding is None, we don't need to decode the value
if encoding is None:
fields[key] = value
else:
fields[key] = to_string(value, encoding=encoding)
try:
del fields["id"]
except KeyError: