API refactor

2025-10-07 16:25:52 +09:00
parent 76d0d86211
commit 91c7e04474
1171 changed files with 81940 additions and 44117 deletions
--- a/venv/lib/python3.12/site-packages/kafka/consumer/group.py
+++ b/venv/lib/python3.12/site-packages/kafka/consumer/group.py
@@ -5,7 +5,7 @@ import logging
 import socket
 import time

-from kafka.errors import KafkaConfigurationError, UnsupportedVersionError
+from kafka.errors import KafkaConfigurationError, KafkaTimeoutError, UnsupportedVersionError

 from kafka.vendor import six

@@ -16,8 +16,9 @@ from kafka.coordinator.consumer import ConsumerCoordinator
 from kafka.coordinator.assignors.range import RangePartitionAssignor
 from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
 from kafka.metrics import MetricConfig, Metrics
-from kafka.protocol.offset import OffsetResetStrategy
-from kafka.structs import TopicPartition
+from kafka.protocol.list_offsets import OffsetResetStrategy
+from kafka.structs import OffsetAndMetadata, TopicPartition
+from kafka.util import Timer
 from kafka.version import __version__

 log = logging.getLogger(__name__)
@@ -60,6 +61,8 @@ class KafkaConsumer(six.Iterator):
            raw message key and returns a deserialized key.
        value_deserializer (callable): Any callable that takes a
            raw message value and returns a deserialized value.
+        enable_incremental_fetch_sessions: (bool): Use incremental fetch sessions
+            when available / supported by kafka broker. See KIP-227. Default: True.
        fetch_min_bytes (int): Minimum amount of data the server should
            return for a fetch request, otherwise wait up to
            fetch_max_wait_ms for more data to accumulate. Default: 1.
@@ -98,7 +101,7 @@ class KafkaConsumer(six.Iterator):
            reconnection attempts will continue periodically with this fixed
            rate. To avoid connection storms, a randomization factor of 0.2
            will be applied to the backoff resulting in a random range between
-            20% below and 20% above the computed value. Default: 1000.
+            20% below and 20% above the computed value. Default: 30000.
        max_in_flight_requests_per_connection (int): Requests are pipelined
            to kafka brokers up to this number of maximum requests per
            broker connection. Default: 5.
@@ -118,6 +121,12 @@ class KafkaConsumer(six.Iterator):
            consumed. This ensures no on-the-wire or on-disk corruption to
            the messages occurred. This check adds some overhead, so it may
            be disabled in cases seeking extreme performance. Default: True
+        isolation_level (str): Configure KIP-98 transactional consumer by
+            setting to 'read_committed'. This will cause the consumer to
+            skip records from aborted transactions. Default: 'read_uncommitted'
+        allow_auto_create_topics (bool): Enable/disable auto topic creation
+            on metadata request. Only available with api_version >= (0, 11).
+            Default: True
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata, even if we haven't seen any
            partition leadership changes to proactively discover any new
@@ -195,10 +204,17 @@ class KafkaConsumer(six.Iterator):
            or other configuration forbids use of all the specified ciphers),
            an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
        api_version (tuple): Specify which Kafka API version to use. If set to
-            None, the client will attempt to infer the broker version by probing
-            various APIs. Different versions enable different functionality.
+            None, the client will attempt to determine the broker version via
+            ApiVersionsRequest API or, for brokers earlier than 0.10, probing
+            various known APIs. Dynamic version checking is performed eagerly
+            during __init__ and can raise NoBrokersAvailableError if no connection
+            was made before timeout (see api_version_auto_timeout_ms below).
+            Different versions enable different functionality.

            Examples:
+                (3, 9) most recent broker release, enable all supported features
+                (0, 11) enables message format v2 (internal)
+                (0, 10, 0) enables sasl authentication and message format v1
                (0, 9) enables full group coordination features with automatic
                    partition assignment and rebalancing,
                (0, 8, 2) enables kafka-storage offset commits with manual
@@ -212,6 +228,7 @@ class KafkaConsumer(six.Iterator):
        api_version_auto_timeout_ms (int): number of milliseconds to throw a
            timeout exception from the constructor when checking the broker
            api version. Only applies if api_version set to None.
+            Default: 2000
        connections_max_idle_ms: Close idle connections after the number of
            milliseconds specified by this config. The broker closes idle
            connections after connections.max.idle.ms, so this avoids hitting
@@ -220,6 +237,7 @@ class KafkaConsumer(six.Iterator):
        metric_reporters (list): A list of classes to use as metrics reporters.
            Implementing the AbstractMetricsReporter interface allows plugging
            in classes that will be notified of new metric creation. Default: []
+        metrics_enabled (bool): Whether to track metrics on this instance. Default True.
        metrics_num_samples (int): The number of samples maintained to compute
            metrics. Default: 2
        metrics_sample_window_ms (int): The maximum age in milliseconds of
@@ -238,12 +256,17 @@ class KafkaConsumer(six.Iterator):
            Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
        sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
            Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
+        sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
+            sasl mechanism handshake. If provided, sasl_kerberos_service_name and
+            sasl_kerberos_domain name are ignored. Default: None.
        sasl_kerberos_service_name (str): Service name to include in GSSAPI
            sasl mechanism handshake. Default: 'kafka'
        sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
            sasl mechanism handshake. Default: one of bootstrap servers
-        sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
-            instance. (See kafka.oauth.abstract). Default: None
+        sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
+            token provider instance. Default: None
+        socks5_proxy (str): Socks5 proxy URL. Default: None
+        kafka_client (callable): Custom class / callable for creating KafkaClient instances

    Note:
        Configuration parameters are described in more detail at
@@ -255,6 +278,7 @@ class KafkaConsumer(six.Iterator):
        'group_id': None,
        'key_deserializer': None,
        'value_deserializer': None,
+        'enable_incremental_fetch_sessions': True,
        'fetch_max_wait_ms': 500,
        'fetch_min_bytes': 1,
        'fetch_max_bytes': 52428800,
@@ -262,13 +286,15 @@ class KafkaConsumer(six.Iterator):
        'request_timeout_ms': 305000, # chosen to be higher than the default of max_poll_interval_ms
        'retry_backoff_ms': 100,
        'reconnect_backoff_ms': 50,
-        'reconnect_backoff_max_ms': 1000,
+        'reconnect_backoff_max_ms': 30000,
        'max_in_flight_requests_per_connection': 5,
        'auto_offset_reset': 'latest',
        'enable_auto_commit': True,
        'auto_commit_interval_ms': 5000,
        'default_offset_commit_callback': lambda offsets, response: True,
        'check_crcs': True,
+        'isolation_level': 'read_uncommitted',
+        'allow_auto_create_topics': True,
        'metadata_max_age_ms': 5 * 60 * 1000,
        'partition_assignment_strategy': (RangePartitionAssignor, RoundRobinPartitionAssignor),
        'max_poll_records': 500,
@@ -294,6 +320,7 @@ class KafkaConsumer(six.Iterator):
        'api_version_auto_timeout_ms': 2000,
        'connections_max_idle_ms': 9 * 60 * 1000,
        'metric_reporters': [],
+        'metrics_enabled': True,
        'metrics_num_samples': 2,
        'metrics_sample_window_ms': 30000,
        'metric_group_prefix': 'consumer',
@@ -302,10 +329,12 @@ class KafkaConsumer(six.Iterator):
        'sasl_mechanism': None,
        'sasl_plain_username': None,
        'sasl_plain_password': None,
+        'sasl_kerberos_name': None,
        'sasl_kerberos_service_name': 'kafka',
        'sasl_kerberos_domain_name': None,
        'sasl_oauth_token_provider': None,
-        'legacy_iterator': False, # enable to revert to < 1.4.7 iterator
+        'socks5_proxy': None,
+        'kafka_client': KafkaClient,
    }
    DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000

@@ -335,13 +364,15 @@ class KafkaConsumer(six.Iterator):
                "fetch_max_wait_ms ({})."
                .format(connections_max_idle_ms, request_timeout_ms, fetch_max_wait_ms))

-        metrics_tags = {'client-id': self.config['client_id']}
-        metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
-                                     time_window_ms=self.config['metrics_sample_window_ms'],
-                                     tags=metrics_tags)
-        reporters = [reporter() for reporter in self.config['metric_reporters']]
-        self._metrics = Metrics(metric_config, reporters)
-        # TODO _metrics likely needs to be passed to KafkaClient, etc.
+        if self.config['metrics_enabled']:
+            metrics_tags = {'client-id': self.config['client_id']}
+            metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
+                                         time_window_ms=self.config['metrics_sample_window_ms'],
+                                         tags=metrics_tags)
+            reporters = [reporter() for reporter in self.config['metric_reporters']]
+            self._metrics = Metrics(metric_config, reporters)
+        else:
+            self._metrics = None

        # api_version was previously a str. Accept old format for now
        if isinstance(self.config['api_version'], str):
@@ -353,11 +384,10 @@ class KafkaConsumer(six.Iterator):
            log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
                        str(self.config['api_version']), str_version)

-        self._client = KafkaClient(metrics=self._metrics, **self.config)
+        self._client = self.config['kafka_client'](metrics=self._metrics, **self.config)

-        # Get auto-discovered version from client if necessary
-        if self.config['api_version'] is None:
-            self.config['api_version'] = self._client.config['api_version']
+        # Get auto-discovered / normalized version from client
+        self.config['api_version'] = self._client.config['api_version']

        # Coordinator configurations are different for older brokers
        # max_poll_interval_ms is not supported directly -- it must the be
@@ -380,9 +410,9 @@ class KafkaConsumer(six.Iterator):

        self._subscription = SubscriptionState(self.config['auto_offset_reset'])
        self._fetcher = Fetcher(
-            self._client, self._subscription, self._metrics, **self.config)
+            self._client, self._subscription, metrics=self._metrics, **self.config)
        self._coordinator = ConsumerCoordinator(
-            self._client, self._subscription, self._metrics,
+            self._client, self._subscription, metrics=self._metrics,
            assignors=self.config['partition_assignment_strategy'],
            **self.config)
        self._closed = False
@@ -422,8 +452,15 @@ class KafkaConsumer(six.Iterator):
            no rebalance operation triggered when group membership or cluster
            and topic metadata change.
        """
-        self._subscription.assign_from_user(partitions)
-        self._client.set_topics([tp.topic for tp in partitions])
+        if not partitions:
+            self.unsubscribe()
+        else:
+            # make sure the offsets of topic partitions the consumer is unsubscribing from
+            # are committed since there will be no following rebalance
+            self._coordinator.maybe_auto_commit_offsets_now()
+            self._subscription.assign_from_user(partitions)
+            self._client.set_topics([tp.topic for tp in partitions])
+            log.debug("Subscribed to partition(s): %s", partitions)

    def assignment(self):
        """Get the TopicPartitions currently assigned to this consumer.
@@ -441,20 +478,23 @@ class KafkaConsumer(six.Iterator):
        """
        return self._subscription.assigned_partitions()

-    def close(self, autocommit=True):
+    def close(self, autocommit=True, timeout_ms=None):
        """Close the consumer, waiting indefinitely for any needed cleanup.

        Keyword Arguments:
            autocommit (bool): If auto-commit is configured for this consumer,
                this optional flag causes the consumer to attempt to commit any
                pending consumed offsets prior to close. Default: True
+            timeout_ms (num, optional): Milliseconds to wait for auto-commit.
+                Default: None
        """
        if self._closed:
            return
        log.debug("Closing the KafkaConsumer.")
        self._closed = True
-        self._coordinator.close(autocommit=autocommit)
-        self._metrics.close()
+        self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms)
+        if self._metrics:
+            self._metrics.close()
        self._client.close()
        try:
            self.config['key_deserializer'].close()
@@ -500,7 +540,7 @@ class KafkaConsumer(six.Iterator):
            offsets, callback=callback)
        return future

-    def commit(self, offsets=None):
+    def commit(self, offsets=None, timeout_ms=None):
        """Commit offsets to kafka, blocking until success or error.

        This commits offsets only to Kafka. The offsets committed using this API
@@ -524,17 +564,16 @@ class KafkaConsumer(six.Iterator):
        assert self.config['group_id'] is not None, 'Requires group_id'
        if offsets is None:
            offsets = self._subscription.all_consumed_offsets()
-        self._coordinator.commit_offsets_sync(offsets)
+        self._coordinator.commit_offsets_sync(offsets, timeout_ms=timeout_ms)

-    def committed(self, partition, metadata=False):
+    def committed(self, partition, metadata=False, timeout_ms=None):
        """Get the last committed offset for the given partition.

        This offset will be used as the position for the consumer
        in the event of a failure.

-        This call may block to do a remote call if the partition in question
-        isn't assigned to this consumer or if the consumer hasn't yet
-        initialized its cache of committed offsets.
+        This call will block to do a remote call to get the latest committed
+        offsets from the server.

        Arguments:
            partition (TopicPartition): The partition to check.
@@ -543,28 +582,19 @@ class KafkaConsumer(six.Iterator):

        Returns:
            The last committed offset (int or OffsetAndMetadata), or None if there was no prior commit.
+
+        Raises:
+            KafkaTimeoutError if timeout_ms provided
+            BrokerResponseErrors if OffsetFetchRequest raises an error.
        """
        assert self.config['api_version'] >= (0, 8, 1), 'Requires >= Kafka 0.8.1'
        assert self.config['group_id'] is not None, 'Requires group_id'
        if not isinstance(partition, TopicPartition):
            raise TypeError('partition must be a TopicPartition namedtuple')
-        if self._subscription.is_assigned(partition):
-            committed = self._subscription.assignment[partition].committed
-            if committed is None:
-                self._coordinator.refresh_committed_offsets_if_needed()
-                committed = self._subscription.assignment[partition].committed
-        else:
-            commit_map = self._coordinator.fetch_committed_offsets([partition])
-            if partition in commit_map:
-                committed = commit_map[partition]
-            else:
-                committed = None
-
-        if committed is not None:
-            if metadata:
-                return committed
-            else:
-                return committed.offset
+        committed = self._coordinator.fetch_committed_offsets([partition], timeout_ms=timeout_ms)
+        if partition not in committed:
+            return None
+        return committed[partition] if metadata else committed[partition].offset

    def _fetch_all_topic_metadata(self):
        """A blocking call that fetches topic metadata for all topics in the
@@ -609,7 +639,7 @@ class KafkaConsumer(six.Iterator):
        if partitions is None:
            self._fetch_all_topic_metadata()
            partitions = cluster.partitions_for_topic(topic)
-        return partitions
+        return partitions or set()

    def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
        """Fetch data from assigned topics / partitions.
@@ -649,82 +679,88 @@ class KafkaConsumer(six.Iterator):
        assert not self._closed, 'KafkaConsumer is closed'

        # Poll for new data until the timeout expires
-        start = time.time()
-        remaining = timeout_ms
-        while True:
-            records = self._poll_once(remaining, max_records, update_offsets=update_offsets)
+        timer = Timer(timeout_ms)
+        while not self._closed:
+            records = self._poll_once(timer, max_records, update_offsets=update_offsets)
            if records:
                return records
+            elif timer.expired:
+                break
+        return {}

-            elapsed_ms = (time.time() - start) * 1000
-            remaining = timeout_ms - elapsed_ms
-
-            if remaining <= 0:
-                return {}
-
-    def _poll_once(self, timeout_ms, max_records, update_offsets=True):
+    def _poll_once(self, timer, max_records, update_offsets=True):
        """Do one round of polling. In addition to checking for new data, this does
        any needed heart-beating, auto-commits, and offset updates.

        Arguments:
-            timeout_ms (int): The maximum time in milliseconds to block.
+            timer (Timer): The maximum time in milliseconds to block.

        Returns:
            dict: Map of topic to list of records (may be empty).
        """
-        self._coordinator.poll()
+        if not self._coordinator.poll(timeout_ms=timer.timeout_ms):
+            log.debug('poll: timeout during coordinator.poll(); returning early')
+            return {}

-        # Fetch positions if we have partitions we're subscribed to that we
-        # don't know the offset for
-        if not self._subscription.has_all_fetch_positions():
-            self._update_fetch_positions(self._subscription.missing_fetch_positions())
+        has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms)

        # If data is available already, e.g. from a previous network client
        # poll() call to commit, then just return it immediately
        records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
+        log.debug('poll: fetched records: %s, %s', records, partial)
+        # Before returning the fetched records, we can send off the
+        # next round of fetches and avoid block waiting for their
+        # responses to enable pipelining while the user is handling the
+        # fetched records.
+        if not partial:
+            log.debug("poll: Sending fetches")
+            futures = self._fetcher.send_fetches()
+            if len(futures):
+                self._client.poll(timeout_ms=0)
+
        if records:
-            # Before returning the fetched records, we can send off the
-            # next round of fetches and avoid block waiting for their
-            # responses to enable pipelining while the user is handling the
-            # fetched records.
-            if not partial:
-                futures = self._fetcher.send_fetches()
-                if len(futures):
-                    self._client.poll(timeout_ms=0)
            return records

-        # Send any new fetches (won't resend pending fetches)
-        futures = self._fetcher.send_fetches()
-        if len(futures):
-            self._client.poll(timeout_ms=0)
+        # We do not want to be stuck blocking in poll if we are missing some positions
+        # since the offset lookup may be backing off after a failure
+        poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000)
+        if not has_all_fetch_positions:
+            log.debug('poll: do not have all fetch positions...')
+            poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms'])

-        timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000)
-        self._client.poll(timeout_ms=timeout_ms)
+        self._client.poll(timeout_ms=poll_timeout_ms)
        # after the long poll, we should check whether the group needs to rebalance
        # prior to returning data so that the group can stabilize faster
        if self._coordinator.need_rejoin():
+            log.debug('poll: coordinator needs rejoin; returning early')
            return {}

        records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
        return records

-    def position(self, partition):
+    def position(self, partition, timeout_ms=None):
        """Get the offset of the next record that will be fetched

        Arguments:
            partition (TopicPartition): Partition to check

        Returns:
-            int: Offset
+            int: Offset or None
        """
        if not isinstance(partition, TopicPartition):
            raise TypeError('partition must be a TopicPartition namedtuple')
        assert self._subscription.is_assigned(partition), 'Partition is not assigned'
-        offset = self._subscription.assignment[partition].position
-        if offset is None:
-            self._update_fetch_positions([partition])
-            offset = self._subscription.assignment[partition].position
-        return offset
+
+        timer = Timer(timeout_ms)
+        position = self._subscription.assignment[partition].position
+        while position is None:
+            # batch update fetch positions for any partitions without a valid position
+            if self._update_fetch_positions(timeout_ms=timer.timeout_ms):
+                position = self._subscription.assignment[partition].position
+            elif timer.expired:
+                return None
+        else:
+            return position.offset

    def highwater(self, partition):
        """Last known highwater offset for a partition.
@@ -818,8 +854,7 @@ class KafkaConsumer(six.Iterator):
        assert partition in self._subscription.assigned_partitions(), 'Unassigned partition'
        log.debug("Seeking to offset %s for partition %s", offset, partition)
        self._subscription.assignment[partition].seek(offset)
-        if not self.config['legacy_iterator']:
-            self._iterator = None
+        self._iterator = None

    def seek_to_beginning(self, *partitions):
        """Seek to the oldest available offset for partitions.
@@ -843,9 +878,8 @@ class KafkaConsumer(six.Iterator):

        for tp in partitions:
            log.debug("Seeking to beginning of partition %s", tp)
-            self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST)
-        if not self.config['legacy_iterator']:
-            self._iterator = None
+            self._subscription.request_offset_reset(tp, OffsetResetStrategy.EARLIEST)
+        self._iterator = None

    def seek_to_end(self, *partitions):
        """Seek to the most recent available offset for partitions.
@@ -869,9 +903,8 @@ class KafkaConsumer(six.Iterator):

        for tp in partitions:
            log.debug("Seeking to end of partition %s", tp)
-            self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST)
-        if not self.config['legacy_iterator']:
-            self._iterator = None
+            self._subscription.request_offset_reset(tp, OffsetResetStrategy.LATEST)
+        self._iterator = None

    def subscribe(self, topics=(), pattern=None, listener=None):
        """Subscribe to a list of topics, or a topic regex pattern.
@@ -942,13 +975,16 @@ class KafkaConsumer(six.Iterator):

    def unsubscribe(self):
        """Unsubscribe from all topics and clear all assigned partitions."""
+        # make sure the offsets of topic partitions the consumer is unsubscribing from
+        # are committed since there will be no following rebalance
+        self._coordinator.maybe_auto_commit_offsets_now()
        self._subscription.unsubscribe()
-        self._coordinator.close()
+        if self.config['api_version'] >= (0, 9):
+            self._coordinator.maybe_leave_group()
        self._client.cluster.need_all_topic_metadata = False
        self._client.set_topics([])
        log.debug("Unsubscribed all topics or patterns and assigned partitions")
-        if not self.config['legacy_iterator']:
-            self._iterator = None
+        self._iterator = None

    def metrics(self, raw=False):
        """Get metrics on consumer performance.
@@ -960,6 +996,8 @@ class KafkaConsumer(six.Iterator):
            This is an unstable interface. It may change in future
            releases without warning.
        """
+        if not self._metrics:
+            return
        if raw:
            return self._metrics.metrics.copy()

@@ -1015,7 +1053,7 @@ class KafkaConsumer(six.Iterator):
                raise ValueError(
                    "The target time for partition {} is {}. The target time "
                    "cannot be negative.".format(tp, ts))
-        return self._fetcher.get_offsets_by_times(
+        return self._fetcher.offsets_by_times(
            timestamps, self.config['request_timeout_ms'])

    def beginning_offsets(self, partitions):
@@ -1081,7 +1119,7 @@ class KafkaConsumer(six.Iterator):
            return False
        return True

-    def _update_fetch_positions(self, partitions):
+    def _update_fetch_positions(self, timeout_ms=None):
        """Set the fetch position to the committed position (if there is one)
        or reset it using the offset reset policy the user has configured.

@@ -1089,30 +1127,36 @@ class KafkaConsumer(six.Iterator):
            partitions (List[TopicPartition]): The partitions that need
                updating fetch positions.

+        Returns True if fetch positions updated, False if timeout or async reset is pending
+
        Raises:
            NoOffsetForPartitionError: If no offset is stored for a given
                partition and no offset reset policy is defined.
        """
-        # Lookup any positions for partitions which are awaiting reset (which may be the
-        # case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
-        # this check first to avoid an unnecessary lookup of committed offsets (which
-        # typically occurs when the user is manually assigning partitions and managing
-        # their own offsets).
-        self._fetcher.reset_offsets_if_needed(partitions)
+        if self._subscription.has_all_fetch_positions():
+            return True

-        if not self._subscription.has_all_fetch_positions():
-            # if we still don't have offsets for all partitions, then we should either seek
-            # to the last committed position or reset using the auto reset policy
-            if (self.config['api_version'] >= (0, 8, 1) and
-                self.config['group_id'] is not None):
-                # first refresh commits for all assigned partitions
-                self._coordinator.refresh_committed_offsets_if_needed()
+        if (self.config['api_version'] >= (0, 8, 1) and
+            self.config['group_id'] is not None):
+            # If there are any partitions which do not have a valid position and are not
+            # awaiting reset, then we need to fetch committed offsets. We will only do a
+            # coordinator lookup if there are partitions which have missing positions, so
+            # a consumer with manually assigned partitions can avoid a coordinator dependence
+            # by always ensuring that assigned partitions have an initial position.
+            if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms):
+                return False

-            # Then, do any offset lookups in case some positions are not known
-            self._fetcher.update_fetch_positions(partitions)
+        # If there are partitions still needing a position and a reset policy is defined,
+        # request reset using the default policy. If no reset strategy is defined and there
+        # are partitions with a missing position, then we will raise an exception.
+        self._subscription.reset_missing_positions()
+
+        # Finally send an asynchronous request to lookup and update the positions of any
+        # partitions which are awaiting reset.
+        return not self._fetcher.reset_offsets_if_needed()

    def _message_generator_v2(self):
-        timeout_ms = 1000 * (self._consumer_timeout - time.time())
+        timeout_ms = 1000 * max(0, self._consumer_timeout - time.time())
        record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False)
        for tp, records in six.iteritems(record_map):
            # Generators are stateful, and it is possible that the tp / records
@@ -1127,72 +1171,15 @@ class KafkaConsumer(six.Iterator):
                    log.debug("Not returning fetched records for partition %s"
                              " since it is no longer fetchable", tp)
                    break
-                self._subscription.assignment[tp].position = record.offset + 1
+                self._subscription.assignment[tp].position = OffsetAndMetadata(record.offset + 1, '', -1)
                yield record

-    def _message_generator(self):
-        assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment'
-        while time.time() < self._consumer_timeout:
-
-            self._coordinator.poll()
-
-            # Fetch offsets for any subscribed partitions that we arent tracking yet
-            if not self._subscription.has_all_fetch_positions():
-                partitions = self._subscription.missing_fetch_positions()
-                self._update_fetch_positions(partitions)
-
-            poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms'])
-            self._client.poll(timeout_ms=poll_ms)
-
-            # after the long poll, we should check whether the group needs to rebalance
-            # prior to returning data so that the group can stabilize faster
-            if self._coordinator.need_rejoin():
-                continue
-
-            # We need to make sure we at least keep up with scheduled tasks,
-            # like heartbeats, auto-commits, and metadata refreshes
-            timeout_at = self._next_timeout()
-
-            # Short-circuit the fetch iterator if we are already timed out
-            # to avoid any unintentional interaction with fetcher setup
-            if time.time() > timeout_at:
-                continue
-
-            for msg in self._fetcher:
-                yield msg
-                if time.time() > timeout_at:
-                    log.debug("internal iterator timeout - breaking for poll")
-                    break
-                self._client.poll(timeout_ms=0)
-
-            # An else block on a for loop only executes if there was no break
-            # so this should only be called on a StopIteration from the fetcher
-            # We assume that it is safe to init_fetches when fetcher is done
-            # i.e., there are no more records stored internally
-            else:
-                self._fetcher.send_fetches()
-
-    def _next_timeout(self):
-        timeout = min(self._consumer_timeout,
-                      self._client.cluster.ttl() / 1000.0 + time.time(),
-                      self._coordinator.time_to_next_poll() + time.time())
-        return timeout
-
    def __iter__(self):  # pylint: disable=non-iterator-returned
        return self

    def __next__(self):
        if self._closed:
            raise StopIteration('KafkaConsumer closed')
-        # Now that the heartbeat thread runs in the background
-        # there should be no reason to maintain a separate iterator
-        # but we'll keep it available for a few releases just in case
-        if self.config['legacy_iterator']:
-            return self.next_v1()
-        else:
-            return self.next_v2()
-
-    def next_v2(self):
        self._set_consumer_timeout()
        while time.time() < self._consumer_timeout:
            if not self._iterator:
@@ -1203,17 +1190,6 @@ class KafkaConsumer(six.Iterator):
                self._iterator = None
        raise StopIteration()

-    def next_v1(self):
-        if not self._iterator:
-            self._iterator = self._message_generator()
-
-        self._set_consumer_timeout()
-        try:
-            return next(self._iterator)
-        except StopIteration:
-            self._iterator = None
-            raise
-
    def _set_consumer_timeout(self):
        # consumer_timeout_ms can be used to stop iteration early
        if self.config['consumer_timeout_ms'] >= 0: