This commit is contained in:
@@ -5,7 +5,7 @@ import logging
|
||||
import socket
|
||||
import time
|
||||
|
||||
from kafka.errors import KafkaConfigurationError, UnsupportedVersionError
|
||||
from kafka.errors import KafkaConfigurationError, KafkaTimeoutError, UnsupportedVersionError
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
@@ -16,8 +16,9 @@ from kafka.coordinator.consumer import ConsumerCoordinator
|
||||
from kafka.coordinator.assignors.range import RangePartitionAssignor
|
||||
from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
|
||||
from kafka.metrics import MetricConfig, Metrics
|
||||
from kafka.protocol.offset import OffsetResetStrategy
|
||||
from kafka.structs import TopicPartition
|
||||
from kafka.protocol.list_offsets import OffsetResetStrategy
|
||||
from kafka.structs import OffsetAndMetadata, TopicPartition
|
||||
from kafka.util import Timer
|
||||
from kafka.version import __version__
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -60,6 +61,8 @@ class KafkaConsumer(six.Iterator):
|
||||
raw message key and returns a deserialized key.
|
||||
value_deserializer (callable): Any callable that takes a
|
||||
raw message value and returns a deserialized value.
|
||||
enable_incremental_fetch_sessions: (bool): Use incremental fetch sessions
|
||||
when available / supported by kafka broker. See KIP-227. Default: True.
|
||||
fetch_min_bytes (int): Minimum amount of data the server should
|
||||
return for a fetch request, otherwise wait up to
|
||||
fetch_max_wait_ms for more data to accumulate. Default: 1.
|
||||
@@ -98,7 +101,7 @@ class KafkaConsumer(six.Iterator):
|
||||
reconnection attempts will continue periodically with this fixed
|
||||
rate. To avoid connection storms, a randomization factor of 0.2
|
||||
will be applied to the backoff resulting in a random range between
|
||||
20% below and 20% above the computed value. Default: 1000.
|
||||
20% below and 20% above the computed value. Default: 30000.
|
||||
max_in_flight_requests_per_connection (int): Requests are pipelined
|
||||
to kafka brokers up to this number of maximum requests per
|
||||
broker connection. Default: 5.
|
||||
@@ -118,6 +121,12 @@ class KafkaConsumer(six.Iterator):
|
||||
consumed. This ensures no on-the-wire or on-disk corruption to
|
||||
the messages occurred. This check adds some overhead, so it may
|
||||
be disabled in cases seeking extreme performance. Default: True
|
||||
isolation_level (str): Configure KIP-98 transactional consumer by
|
||||
setting to 'read_committed'. This will cause the consumer to
|
||||
skip records from aborted transactions. Default: 'read_uncommitted'
|
||||
allow_auto_create_topics (bool): Enable/disable auto topic creation
|
||||
on metadata request. Only available with api_version >= (0, 11).
|
||||
Default: True
|
||||
metadata_max_age_ms (int): The period of time in milliseconds after
|
||||
which we force a refresh of metadata, even if we haven't seen any
|
||||
partition leadership changes to proactively discover any new
|
||||
@@ -195,10 +204,17 @@ class KafkaConsumer(six.Iterator):
|
||||
or other configuration forbids use of all the specified ciphers),
|
||||
an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
|
||||
api_version (tuple): Specify which Kafka API version to use. If set to
|
||||
None, the client will attempt to infer the broker version by probing
|
||||
various APIs. Different versions enable different functionality.
|
||||
None, the client will attempt to determine the broker version via
|
||||
ApiVersionsRequest API or, for brokers earlier than 0.10, probing
|
||||
various known APIs. Dynamic version checking is performed eagerly
|
||||
during __init__ and can raise NoBrokersAvailableError if no connection
|
||||
was made before timeout (see api_version_auto_timeout_ms below).
|
||||
Different versions enable different functionality.
|
||||
|
||||
Examples:
|
||||
(3, 9) most recent broker release, enable all supported features
|
||||
(0, 11) enables message format v2 (internal)
|
||||
(0, 10, 0) enables sasl authentication and message format v1
|
||||
(0, 9) enables full group coordination features with automatic
|
||||
partition assignment and rebalancing,
|
||||
(0, 8, 2) enables kafka-storage offset commits with manual
|
||||
@@ -212,6 +228,7 @@ class KafkaConsumer(six.Iterator):
|
||||
api_version_auto_timeout_ms (int): number of milliseconds to throw a
|
||||
timeout exception from the constructor when checking the broker
|
||||
api version. Only applies if api_version set to None.
|
||||
Default: 2000
|
||||
connections_max_idle_ms: Close idle connections after the number of
|
||||
milliseconds specified by this config. The broker closes idle
|
||||
connections after connections.max.idle.ms, so this avoids hitting
|
||||
@@ -220,6 +237,7 @@ class KafkaConsumer(six.Iterator):
|
||||
metric_reporters (list): A list of classes to use as metrics reporters.
|
||||
Implementing the AbstractMetricsReporter interface allows plugging
|
||||
in classes that will be notified of new metric creation. Default: []
|
||||
metrics_enabled (bool): Whether to track metrics on this instance. Default True.
|
||||
metrics_num_samples (int): The number of samples maintained to compute
|
||||
metrics. Default: 2
|
||||
metrics_sample_window_ms (int): The maximum age in milliseconds of
|
||||
@@ -238,12 +256,17 @@ class KafkaConsumer(six.Iterator):
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
|
||||
sasl mechanism handshake. If provided, sasl_kerberos_service_name and
|
||||
sasl_kerberos_domain name are ignored. Default: None.
|
||||
sasl_kerberos_service_name (str): Service name to include in GSSAPI
|
||||
sasl mechanism handshake. Default: 'kafka'
|
||||
sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
|
||||
sasl mechanism handshake. Default: one of bootstrap servers
|
||||
sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
|
||||
instance. (See kafka.oauth.abstract). Default: None
|
||||
sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
|
||||
token provider instance. Default: None
|
||||
socks5_proxy (str): Socks5 proxy URL. Default: None
|
||||
kafka_client (callable): Custom class / callable for creating KafkaClient instances
|
||||
|
||||
Note:
|
||||
Configuration parameters are described in more detail at
|
||||
@@ -255,6 +278,7 @@ class KafkaConsumer(six.Iterator):
|
||||
'group_id': None,
|
||||
'key_deserializer': None,
|
||||
'value_deserializer': None,
|
||||
'enable_incremental_fetch_sessions': True,
|
||||
'fetch_max_wait_ms': 500,
|
||||
'fetch_min_bytes': 1,
|
||||
'fetch_max_bytes': 52428800,
|
||||
@@ -262,13 +286,15 @@ class KafkaConsumer(six.Iterator):
|
||||
'request_timeout_ms': 305000, # chosen to be higher than the default of max_poll_interval_ms
|
||||
'retry_backoff_ms': 100,
|
||||
'reconnect_backoff_ms': 50,
|
||||
'reconnect_backoff_max_ms': 1000,
|
||||
'reconnect_backoff_max_ms': 30000,
|
||||
'max_in_flight_requests_per_connection': 5,
|
||||
'auto_offset_reset': 'latest',
|
||||
'enable_auto_commit': True,
|
||||
'auto_commit_interval_ms': 5000,
|
||||
'default_offset_commit_callback': lambda offsets, response: True,
|
||||
'check_crcs': True,
|
||||
'isolation_level': 'read_uncommitted',
|
||||
'allow_auto_create_topics': True,
|
||||
'metadata_max_age_ms': 5 * 60 * 1000,
|
||||
'partition_assignment_strategy': (RangePartitionAssignor, RoundRobinPartitionAssignor),
|
||||
'max_poll_records': 500,
|
||||
@@ -294,6 +320,7 @@ class KafkaConsumer(six.Iterator):
|
||||
'api_version_auto_timeout_ms': 2000,
|
||||
'connections_max_idle_ms': 9 * 60 * 1000,
|
||||
'metric_reporters': [],
|
||||
'metrics_enabled': True,
|
||||
'metrics_num_samples': 2,
|
||||
'metrics_sample_window_ms': 30000,
|
||||
'metric_group_prefix': 'consumer',
|
||||
@@ -302,10 +329,12 @@ class KafkaConsumer(six.Iterator):
|
||||
'sasl_mechanism': None,
|
||||
'sasl_plain_username': None,
|
||||
'sasl_plain_password': None,
|
||||
'sasl_kerberos_name': None,
|
||||
'sasl_kerberos_service_name': 'kafka',
|
||||
'sasl_kerberos_domain_name': None,
|
||||
'sasl_oauth_token_provider': None,
|
||||
'legacy_iterator': False, # enable to revert to < 1.4.7 iterator
|
||||
'socks5_proxy': None,
|
||||
'kafka_client': KafkaClient,
|
||||
}
|
||||
DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000
|
||||
|
||||
@@ -335,13 +364,15 @@ class KafkaConsumer(six.Iterator):
|
||||
"fetch_max_wait_ms ({})."
|
||||
.format(connections_max_idle_ms, request_timeout_ms, fetch_max_wait_ms))
|
||||
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
# TODO _metrics likely needs to be passed to KafkaClient, etc.
|
||||
if self.config['metrics_enabled']:
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
else:
|
||||
self._metrics = None
|
||||
|
||||
# api_version was previously a str. Accept old format for now
|
||||
if isinstance(self.config['api_version'], str):
|
||||
@@ -353,11 +384,10 @@ class KafkaConsumer(six.Iterator):
|
||||
log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
|
||||
str(self.config['api_version']), str_version)
|
||||
|
||||
self._client = KafkaClient(metrics=self._metrics, **self.config)
|
||||
self._client = self.config['kafka_client'](metrics=self._metrics, **self.config)
|
||||
|
||||
# Get auto-discovered version from client if necessary
|
||||
if self.config['api_version'] is None:
|
||||
self.config['api_version'] = self._client.config['api_version']
|
||||
# Get auto-discovered / normalized version from client
|
||||
self.config['api_version'] = self._client.config['api_version']
|
||||
|
||||
# Coordinator configurations are different for older brokers
|
||||
# max_poll_interval_ms is not supported directly -- it must the be
|
||||
@@ -380,9 +410,9 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
self._subscription = SubscriptionState(self.config['auto_offset_reset'])
|
||||
self._fetcher = Fetcher(
|
||||
self._client, self._subscription, self._metrics, **self.config)
|
||||
self._client, self._subscription, metrics=self._metrics, **self.config)
|
||||
self._coordinator = ConsumerCoordinator(
|
||||
self._client, self._subscription, self._metrics,
|
||||
self._client, self._subscription, metrics=self._metrics,
|
||||
assignors=self.config['partition_assignment_strategy'],
|
||||
**self.config)
|
||||
self._closed = False
|
||||
@@ -422,8 +452,15 @@ class KafkaConsumer(six.Iterator):
|
||||
no rebalance operation triggered when group membership or cluster
|
||||
and topic metadata change.
|
||||
"""
|
||||
self._subscription.assign_from_user(partitions)
|
||||
self._client.set_topics([tp.topic for tp in partitions])
|
||||
if not partitions:
|
||||
self.unsubscribe()
|
||||
else:
|
||||
# make sure the offsets of topic partitions the consumer is unsubscribing from
|
||||
# are committed since there will be no following rebalance
|
||||
self._coordinator.maybe_auto_commit_offsets_now()
|
||||
self._subscription.assign_from_user(partitions)
|
||||
self._client.set_topics([tp.topic for tp in partitions])
|
||||
log.debug("Subscribed to partition(s): %s", partitions)
|
||||
|
||||
def assignment(self):
|
||||
"""Get the TopicPartitions currently assigned to this consumer.
|
||||
@@ -441,20 +478,23 @@ class KafkaConsumer(six.Iterator):
|
||||
"""
|
||||
return self._subscription.assigned_partitions()
|
||||
|
||||
def close(self, autocommit=True):
|
||||
def close(self, autocommit=True, timeout_ms=None):
|
||||
"""Close the consumer, waiting indefinitely for any needed cleanup.
|
||||
|
||||
Keyword Arguments:
|
||||
autocommit (bool): If auto-commit is configured for this consumer,
|
||||
this optional flag causes the consumer to attempt to commit any
|
||||
pending consumed offsets prior to close. Default: True
|
||||
timeout_ms (num, optional): Milliseconds to wait for auto-commit.
|
||||
Default: None
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
log.debug("Closing the KafkaConsumer.")
|
||||
self._closed = True
|
||||
self._coordinator.close(autocommit=autocommit)
|
||||
self._metrics.close()
|
||||
self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms)
|
||||
if self._metrics:
|
||||
self._metrics.close()
|
||||
self._client.close()
|
||||
try:
|
||||
self.config['key_deserializer'].close()
|
||||
@@ -500,7 +540,7 @@ class KafkaConsumer(six.Iterator):
|
||||
offsets, callback=callback)
|
||||
return future
|
||||
|
||||
def commit(self, offsets=None):
|
||||
def commit(self, offsets=None, timeout_ms=None):
|
||||
"""Commit offsets to kafka, blocking until success or error.
|
||||
|
||||
This commits offsets only to Kafka. The offsets committed using this API
|
||||
@@ -524,17 +564,16 @@ class KafkaConsumer(six.Iterator):
|
||||
assert self.config['group_id'] is not None, 'Requires group_id'
|
||||
if offsets is None:
|
||||
offsets = self._subscription.all_consumed_offsets()
|
||||
self._coordinator.commit_offsets_sync(offsets)
|
||||
self._coordinator.commit_offsets_sync(offsets, timeout_ms=timeout_ms)
|
||||
|
||||
def committed(self, partition, metadata=False):
|
||||
def committed(self, partition, metadata=False, timeout_ms=None):
|
||||
"""Get the last committed offset for the given partition.
|
||||
|
||||
This offset will be used as the position for the consumer
|
||||
in the event of a failure.
|
||||
|
||||
This call may block to do a remote call if the partition in question
|
||||
isn't assigned to this consumer or if the consumer hasn't yet
|
||||
initialized its cache of committed offsets.
|
||||
This call will block to do a remote call to get the latest committed
|
||||
offsets from the server.
|
||||
|
||||
Arguments:
|
||||
partition (TopicPartition): The partition to check.
|
||||
@@ -543,28 +582,19 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
Returns:
|
||||
The last committed offset (int or OffsetAndMetadata), or None if there was no prior commit.
|
||||
|
||||
Raises:
|
||||
KafkaTimeoutError if timeout_ms provided
|
||||
BrokerResponseErrors if OffsetFetchRequest raises an error.
|
||||
"""
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Requires >= Kafka 0.8.1'
|
||||
assert self.config['group_id'] is not None, 'Requires group_id'
|
||||
if not isinstance(partition, TopicPartition):
|
||||
raise TypeError('partition must be a TopicPartition namedtuple')
|
||||
if self._subscription.is_assigned(partition):
|
||||
committed = self._subscription.assignment[partition].committed
|
||||
if committed is None:
|
||||
self._coordinator.refresh_committed_offsets_if_needed()
|
||||
committed = self._subscription.assignment[partition].committed
|
||||
else:
|
||||
commit_map = self._coordinator.fetch_committed_offsets([partition])
|
||||
if partition in commit_map:
|
||||
committed = commit_map[partition]
|
||||
else:
|
||||
committed = None
|
||||
|
||||
if committed is not None:
|
||||
if metadata:
|
||||
return committed
|
||||
else:
|
||||
return committed.offset
|
||||
committed = self._coordinator.fetch_committed_offsets([partition], timeout_ms=timeout_ms)
|
||||
if partition not in committed:
|
||||
return None
|
||||
return committed[partition] if metadata else committed[partition].offset
|
||||
|
||||
def _fetch_all_topic_metadata(self):
|
||||
"""A blocking call that fetches topic metadata for all topics in the
|
||||
@@ -609,7 +639,7 @@ class KafkaConsumer(six.Iterator):
|
||||
if partitions is None:
|
||||
self._fetch_all_topic_metadata()
|
||||
partitions = cluster.partitions_for_topic(topic)
|
||||
return partitions
|
||||
return partitions or set()
|
||||
|
||||
def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
|
||||
"""Fetch data from assigned topics / partitions.
|
||||
@@ -649,82 +679,88 @@ class KafkaConsumer(six.Iterator):
|
||||
assert not self._closed, 'KafkaConsumer is closed'
|
||||
|
||||
# Poll for new data until the timeout expires
|
||||
start = time.time()
|
||||
remaining = timeout_ms
|
||||
while True:
|
||||
records = self._poll_once(remaining, max_records, update_offsets=update_offsets)
|
||||
timer = Timer(timeout_ms)
|
||||
while not self._closed:
|
||||
records = self._poll_once(timer, max_records, update_offsets=update_offsets)
|
||||
if records:
|
||||
return records
|
||||
elif timer.expired:
|
||||
break
|
||||
return {}
|
||||
|
||||
elapsed_ms = (time.time() - start) * 1000
|
||||
remaining = timeout_ms - elapsed_ms
|
||||
|
||||
if remaining <= 0:
|
||||
return {}
|
||||
|
||||
def _poll_once(self, timeout_ms, max_records, update_offsets=True):
|
||||
def _poll_once(self, timer, max_records, update_offsets=True):
|
||||
"""Do one round of polling. In addition to checking for new data, this does
|
||||
any needed heart-beating, auto-commits, and offset updates.
|
||||
|
||||
Arguments:
|
||||
timeout_ms (int): The maximum time in milliseconds to block.
|
||||
timer (Timer): The maximum time in milliseconds to block.
|
||||
|
||||
Returns:
|
||||
dict: Map of topic to list of records (may be empty).
|
||||
"""
|
||||
self._coordinator.poll()
|
||||
if not self._coordinator.poll(timeout_ms=timer.timeout_ms):
|
||||
log.debug('poll: timeout during coordinator.poll(); returning early')
|
||||
return {}
|
||||
|
||||
# Fetch positions if we have partitions we're subscribed to that we
|
||||
# don't know the offset for
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
self._update_fetch_positions(self._subscription.missing_fetch_positions())
|
||||
has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms)
|
||||
|
||||
# If data is available already, e.g. from a previous network client
|
||||
# poll() call to commit, then just return it immediately
|
||||
records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
|
||||
log.debug('poll: fetched records: %s, %s', records, partial)
|
||||
# Before returning the fetched records, we can send off the
|
||||
# next round of fetches and avoid block waiting for their
|
||||
# responses to enable pipelining while the user is handling the
|
||||
# fetched records.
|
||||
if not partial:
|
||||
log.debug("poll: Sending fetches")
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
|
||||
if records:
|
||||
# Before returning the fetched records, we can send off the
|
||||
# next round of fetches and avoid block waiting for their
|
||||
# responses to enable pipelining while the user is handling the
|
||||
# fetched records.
|
||||
if not partial:
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
return records
|
||||
|
||||
# Send any new fetches (won't resend pending fetches)
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
# We do not want to be stuck blocking in poll if we are missing some positions
|
||||
# since the offset lookup may be backing off after a failure
|
||||
poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000)
|
||||
if not has_all_fetch_positions:
|
||||
log.debug('poll: do not have all fetch positions...')
|
||||
poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms'])
|
||||
|
||||
timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000)
|
||||
self._client.poll(timeout_ms=timeout_ms)
|
||||
self._client.poll(timeout_ms=poll_timeout_ms)
|
||||
# after the long poll, we should check whether the group needs to rebalance
|
||||
# prior to returning data so that the group can stabilize faster
|
||||
if self._coordinator.need_rejoin():
|
||||
log.debug('poll: coordinator needs rejoin; returning early')
|
||||
return {}
|
||||
|
||||
records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
|
||||
return records
|
||||
|
||||
def position(self, partition):
|
||||
def position(self, partition, timeout_ms=None):
|
||||
"""Get the offset of the next record that will be fetched
|
||||
|
||||
Arguments:
|
||||
partition (TopicPartition): Partition to check
|
||||
|
||||
Returns:
|
||||
int: Offset
|
||||
int: Offset or None
|
||||
"""
|
||||
if not isinstance(partition, TopicPartition):
|
||||
raise TypeError('partition must be a TopicPartition namedtuple')
|
||||
assert self._subscription.is_assigned(partition), 'Partition is not assigned'
|
||||
offset = self._subscription.assignment[partition].position
|
||||
if offset is None:
|
||||
self._update_fetch_positions([partition])
|
||||
offset = self._subscription.assignment[partition].position
|
||||
return offset
|
||||
|
||||
timer = Timer(timeout_ms)
|
||||
position = self._subscription.assignment[partition].position
|
||||
while position is None:
|
||||
# batch update fetch positions for any partitions without a valid position
|
||||
if self._update_fetch_positions(timeout_ms=timer.timeout_ms):
|
||||
position = self._subscription.assignment[partition].position
|
||||
elif timer.expired:
|
||||
return None
|
||||
else:
|
||||
return position.offset
|
||||
|
||||
def highwater(self, partition):
|
||||
"""Last known highwater offset for a partition.
|
||||
@@ -818,8 +854,7 @@ class KafkaConsumer(six.Iterator):
|
||||
assert partition in self._subscription.assigned_partitions(), 'Unassigned partition'
|
||||
log.debug("Seeking to offset %s for partition %s", offset, partition)
|
||||
self._subscription.assignment[partition].seek(offset)
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
self._iterator = None
|
||||
|
||||
def seek_to_beginning(self, *partitions):
|
||||
"""Seek to the oldest available offset for partitions.
|
||||
@@ -843,9 +878,8 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
for tp in partitions:
|
||||
log.debug("Seeking to beginning of partition %s", tp)
|
||||
self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST)
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
self._subscription.request_offset_reset(tp, OffsetResetStrategy.EARLIEST)
|
||||
self._iterator = None
|
||||
|
||||
def seek_to_end(self, *partitions):
|
||||
"""Seek to the most recent available offset for partitions.
|
||||
@@ -869,9 +903,8 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
for tp in partitions:
|
||||
log.debug("Seeking to end of partition %s", tp)
|
||||
self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST)
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
self._subscription.request_offset_reset(tp, OffsetResetStrategy.LATEST)
|
||||
self._iterator = None
|
||||
|
||||
def subscribe(self, topics=(), pattern=None, listener=None):
|
||||
"""Subscribe to a list of topics, or a topic regex pattern.
|
||||
@@ -942,13 +975,16 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
def unsubscribe(self):
|
||||
"""Unsubscribe from all topics and clear all assigned partitions."""
|
||||
# make sure the offsets of topic partitions the consumer is unsubscribing from
|
||||
# are committed since there will be no following rebalance
|
||||
self._coordinator.maybe_auto_commit_offsets_now()
|
||||
self._subscription.unsubscribe()
|
||||
self._coordinator.close()
|
||||
if self.config['api_version'] >= (0, 9):
|
||||
self._coordinator.maybe_leave_group()
|
||||
self._client.cluster.need_all_topic_metadata = False
|
||||
self._client.set_topics([])
|
||||
log.debug("Unsubscribed all topics or patterns and assigned partitions")
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
self._iterator = None
|
||||
|
||||
def metrics(self, raw=False):
|
||||
"""Get metrics on consumer performance.
|
||||
@@ -960,6 +996,8 @@ class KafkaConsumer(six.Iterator):
|
||||
This is an unstable interface. It may change in future
|
||||
releases without warning.
|
||||
"""
|
||||
if not self._metrics:
|
||||
return
|
||||
if raw:
|
||||
return self._metrics.metrics.copy()
|
||||
|
||||
@@ -1015,7 +1053,7 @@ class KafkaConsumer(six.Iterator):
|
||||
raise ValueError(
|
||||
"The target time for partition {} is {}. The target time "
|
||||
"cannot be negative.".format(tp, ts))
|
||||
return self._fetcher.get_offsets_by_times(
|
||||
return self._fetcher.offsets_by_times(
|
||||
timestamps, self.config['request_timeout_ms'])
|
||||
|
||||
def beginning_offsets(self, partitions):
|
||||
@@ -1081,7 +1119,7 @@ class KafkaConsumer(six.Iterator):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _update_fetch_positions(self, partitions):
|
||||
def _update_fetch_positions(self, timeout_ms=None):
|
||||
"""Set the fetch position to the committed position (if there is one)
|
||||
or reset it using the offset reset policy the user has configured.
|
||||
|
||||
@@ -1089,30 +1127,36 @@ class KafkaConsumer(six.Iterator):
|
||||
partitions (List[TopicPartition]): The partitions that need
|
||||
updating fetch positions.
|
||||
|
||||
Returns True if fetch positions updated, False if timeout or async reset is pending
|
||||
|
||||
Raises:
|
||||
NoOffsetForPartitionError: If no offset is stored for a given
|
||||
partition and no offset reset policy is defined.
|
||||
"""
|
||||
# Lookup any positions for partitions which are awaiting reset (which may be the
|
||||
# case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
|
||||
# this check first to avoid an unnecessary lookup of committed offsets (which
|
||||
# typically occurs when the user is manually assigning partitions and managing
|
||||
# their own offsets).
|
||||
self._fetcher.reset_offsets_if_needed(partitions)
|
||||
if self._subscription.has_all_fetch_positions():
|
||||
return True
|
||||
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
# if we still don't have offsets for all partitions, then we should either seek
|
||||
# to the last committed position or reset using the auto reset policy
|
||||
if (self.config['api_version'] >= (0, 8, 1) and
|
||||
self.config['group_id'] is not None):
|
||||
# first refresh commits for all assigned partitions
|
||||
self._coordinator.refresh_committed_offsets_if_needed()
|
||||
if (self.config['api_version'] >= (0, 8, 1) and
|
||||
self.config['group_id'] is not None):
|
||||
# If there are any partitions which do not have a valid position and are not
|
||||
# awaiting reset, then we need to fetch committed offsets. We will only do a
|
||||
# coordinator lookup if there are partitions which have missing positions, so
|
||||
# a consumer with manually assigned partitions can avoid a coordinator dependence
|
||||
# by always ensuring that assigned partitions have an initial position.
|
||||
if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms):
|
||||
return False
|
||||
|
||||
# Then, do any offset lookups in case some positions are not known
|
||||
self._fetcher.update_fetch_positions(partitions)
|
||||
# If there are partitions still needing a position and a reset policy is defined,
|
||||
# request reset using the default policy. If no reset strategy is defined and there
|
||||
# are partitions with a missing position, then we will raise an exception.
|
||||
self._subscription.reset_missing_positions()
|
||||
|
||||
# Finally send an asynchronous request to lookup and update the positions of any
|
||||
# partitions which are awaiting reset.
|
||||
return not self._fetcher.reset_offsets_if_needed()
|
||||
|
||||
def _message_generator_v2(self):
|
||||
timeout_ms = 1000 * (self._consumer_timeout - time.time())
|
||||
timeout_ms = 1000 * max(0, self._consumer_timeout - time.time())
|
||||
record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False)
|
||||
for tp, records in six.iteritems(record_map):
|
||||
# Generators are stateful, and it is possible that the tp / records
|
||||
@@ -1127,72 +1171,15 @@ class KafkaConsumer(six.Iterator):
|
||||
log.debug("Not returning fetched records for partition %s"
|
||||
" since it is no longer fetchable", tp)
|
||||
break
|
||||
self._subscription.assignment[tp].position = record.offset + 1
|
||||
self._subscription.assignment[tp].position = OffsetAndMetadata(record.offset + 1, '', -1)
|
||||
yield record
|
||||
|
||||
def _message_generator(self):
|
||||
assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment'
|
||||
while time.time() < self._consumer_timeout:
|
||||
|
||||
self._coordinator.poll()
|
||||
|
||||
# Fetch offsets for any subscribed partitions that we arent tracking yet
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
partitions = self._subscription.missing_fetch_positions()
|
||||
self._update_fetch_positions(partitions)
|
||||
|
||||
poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms'])
|
||||
self._client.poll(timeout_ms=poll_ms)
|
||||
|
||||
# after the long poll, we should check whether the group needs to rebalance
|
||||
# prior to returning data so that the group can stabilize faster
|
||||
if self._coordinator.need_rejoin():
|
||||
continue
|
||||
|
||||
# We need to make sure we at least keep up with scheduled tasks,
|
||||
# like heartbeats, auto-commits, and metadata refreshes
|
||||
timeout_at = self._next_timeout()
|
||||
|
||||
# Short-circuit the fetch iterator if we are already timed out
|
||||
# to avoid any unintentional interaction with fetcher setup
|
||||
if time.time() > timeout_at:
|
||||
continue
|
||||
|
||||
for msg in self._fetcher:
|
||||
yield msg
|
||||
if time.time() > timeout_at:
|
||||
log.debug("internal iterator timeout - breaking for poll")
|
||||
break
|
||||
self._client.poll(timeout_ms=0)
|
||||
|
||||
# An else block on a for loop only executes if there was no break
|
||||
# so this should only be called on a StopIteration from the fetcher
|
||||
# We assume that it is safe to init_fetches when fetcher is done
|
||||
# i.e., there are no more records stored internally
|
||||
else:
|
||||
self._fetcher.send_fetches()
|
||||
|
||||
def _next_timeout(self):
|
||||
timeout = min(self._consumer_timeout,
|
||||
self._client.cluster.ttl() / 1000.0 + time.time(),
|
||||
self._coordinator.time_to_next_poll() + time.time())
|
||||
return timeout
|
||||
|
||||
def __iter__(self): # pylint: disable=non-iterator-returned
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._closed:
|
||||
raise StopIteration('KafkaConsumer closed')
|
||||
# Now that the heartbeat thread runs in the background
|
||||
# there should be no reason to maintain a separate iterator
|
||||
# but we'll keep it available for a few releases just in case
|
||||
if self.config['legacy_iterator']:
|
||||
return self.next_v1()
|
||||
else:
|
||||
return self.next_v2()
|
||||
|
||||
def next_v2(self):
|
||||
self._set_consumer_timeout()
|
||||
while time.time() < self._consumer_timeout:
|
||||
if not self._iterator:
|
||||
@@ -1203,17 +1190,6 @@ class KafkaConsumer(six.Iterator):
|
||||
self._iterator = None
|
||||
raise StopIteration()
|
||||
|
||||
def next_v1(self):
|
||||
if not self._iterator:
|
||||
self._iterator = self._message_generator()
|
||||
|
||||
self._set_consumer_timeout()
|
||||
try:
|
||||
return next(self._iterator)
|
||||
except StopIteration:
|
||||
self._iterator = None
|
||||
raise
|
||||
|
||||
def _set_consumer_timeout(self):
|
||||
# consumer_timeout_ms can be used to stop iteration early
|
||||
if self.config['consumer_timeout_ms'] >= 0:
|
||||
|
||||
Reference in New Issue
Block a user