API refactor

2025-10-07 16:25:52 +09:00
parent 76d0d86211
commit 91c7e04474
1171 changed files with 81940 additions and 44117 deletions
--- a/venv/lib/python3.12/site-packages/kafka/producer/sender.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/sender.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, division

 import collections
 import copy
+import heapq
 import logging
 import threading
 import time
@@ -11,6 +12,8 @@ from kafka.vendor import six
 from kafka import errors as Errors
 from kafka.metrics.measurable import AnonMeasurable
 from kafka.metrics.stats import Avg, Max, Rate
+from kafka.producer.transaction_manager import ProducerIdAndEpoch
+from kafka.protocol.init_producer_id import InitProducerIdRequest
 from kafka.protocol.produce import ProduceRequest
 from kafka.structs import TopicPartition
 from kafka.version import __version__
@@ -27,14 +30,18 @@ class Sender(threading.Thread):
    DEFAULT_CONFIG = {
        'max_request_size': 1048576,
        'acks': 1,
-        'retries': 0,
+        'retries': float('inf'),
        'request_timeout_ms': 30000,
+        'retry_backoff_ms': 100,
+        'metrics': None,
        'guarantee_message_order': False,
+        'transaction_manager': None,
+        'transactional_id': None,
+        'transaction_timeout_ms': 60000,
        'client_id': 'kafka-python-' + __version__,
-        'api_version': (0, 8, 0),
    }

-    def __init__(self, client, metadata, accumulator, metrics, **configs):
+    def __init__(self, client, metadata, accumulator, **configs):
        super(Sender, self).__init__()
        self.config = copy.copy(self.DEFAULT_CONFIG)
        for key in self.config:
@@ -48,32 +55,75 @@ class Sender(threading.Thread):
        self._running = True
        self._force_close = False
        self._topics_to_add = set()
-        self._sensors = SenderMetrics(metrics, self._client, self._metadata)
+        if self.config['metrics']:
+            self._sensors = SenderMetrics(self.config['metrics'], self._client, self._metadata)
+        else:
+            self._sensors = None
+        self._transaction_manager = self.config['transaction_manager']
+        # A per-partition queue of batches ordered by creation time for tracking the in-flight batches
+        self._in_flight_batches = collections.defaultdict(list)
+
+    def _maybe_remove_from_inflight_batches(self, batch):
+        try:
+            queue = self._in_flight_batches[batch.topic_partition]
+        except KeyError:
+            return
+        try:
+            idx = queue.index((batch.created, batch))
+        except ValueError:
+            return
+        # https://stackoverflow.com/questions/10162679/python-delete-element-from-heap
+        queue[idx] = queue[-1]
+        queue.pop()
+        heapq.heapify(queue)
+
+    def _get_expired_inflight_batches(self, now=None):
+        """Get the in-flight batches that has reached delivery timeout."""
+        expired_batches = []
+        to_remove = []
+        for tp, queue in six.iteritems(self._in_flight_batches):
+            while queue:
+                _created_at, batch = queue[0]
+                if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
+                    heapq.heappop(queue)
+                    if batch.final_state is None:
+                        expired_batches.append(batch)
+                    else:
+                        raise Errors.IllegalStateError("%s batch created at %s gets unexpected final state %s" % (batch.topic_partition, batch.created, batch.final_state))
+                else:
+                    self._accumulator.maybe_update_next_batch_expiry_time(batch)
+                    break
+            else:
+                # Avoid mutating in_flight_batches during iteration
+                to_remove.append(tp)
+        for tp in to_remove:
+            del self._in_flight_batches[tp]
+        return expired_batches

    def run(self):
        """The main run loop for the sender thread."""
-        log.debug("Starting Kafka producer I/O thread.")
+        log.debug("%s: Starting Kafka producer I/O thread.", str(self))

        # main loop, runs until close is called
        while self._running:
            try:
                self.run_once()
            except Exception:
-                log.exception("Uncaught error in kafka producer I/O thread")
+                log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))

-        log.debug("Beginning shutdown of Kafka producer I/O thread, sending"
-                  " remaining records.")
+        log.debug("%s: Beginning shutdown of Kafka producer I/O thread, sending"
+                  " remaining records.", str(self))

        # okay we stopped accepting requests but there may still be
        # requests in the accumulator or waiting for acknowledgment,
        # wait until these are completed.
        while (not self._force_close
-               and (self._accumulator.has_unsent()
+               and (self._accumulator.has_undrained()
                    or self._client.in_flight_request_count() > 0)):
            try:
                self.run_once()
            except Exception:
-                log.exception("Uncaught error in kafka producer I/O thread")
+                log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))

        if self._force_close:
            # We need to fail all the incomplete batches and wake up the
@@ -83,38 +133,75 @@ class Sender(threading.Thread):
        try:
            self._client.close()
        except Exception:
-            log.exception("Failed to close network client")
+            log.exception("%s: Failed to close network client", str(self))

-        log.debug("Shutdown of Kafka producer I/O thread has completed.")
+        log.debug("%s: Shutdown of Kafka producer I/O thread has completed.", str(self))

    def run_once(self):
        """Run a single iteration of sending."""
        while self._topics_to_add:
            self._client.add_topic(self._topics_to_add.pop())

+        if self._transaction_manager:
+            try:
+                if not self._transaction_manager.is_transactional():
+                    # this is an idempotent producer, so make sure we have a producer id
+                    self._maybe_wait_for_producer_id()
+                elif self._transaction_manager.has_in_flight_transactional_request() or self._maybe_send_transactional_request():
+                    # as long as there are outstanding transactional requests, we simply wait for them to return
+                    self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
+                    return
+
+                # do not continue sending if the transaction manager is in a failed state or if there
+                # is no producer id (for the idempotent case).
+                if self._transaction_manager.has_fatal_error() or not self._transaction_manager.has_producer_id():
+                    last_error = self._transaction_manager.last_error
+                    if last_error is not None:
+                        self._maybe_abort_batches(last_error)
+                    self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
+                    return
+                elif self._transaction_manager.has_abortable_error():
+                    self._accumulator.abort_undrained_batches(self._transaction_manager.last_error)
+
+            except Errors.SaslAuthenticationFailedError as e:
+                # This is already logged as error, but propagated here to perform any clean ups.
+                log.debug("%s: Authentication exception while processing transactional request: %s", str(self), e)
+                self._transaction_manager.authentication_failed(e)
+
+        poll_timeout_ms = self._send_producer_data()
+        self._client.poll(timeout_ms=poll_timeout_ms)
+
+    def _send_producer_data(self, now=None):
+        now = time.time() if now is None else now
        # get the list of partitions with data ready to send
-        result = self._accumulator.ready(self._metadata)
+        result = self._accumulator.ready(self._metadata, now=now)
        ready_nodes, next_ready_check_delay, unknown_leaders_exist = result

        # if there are any partitions whose leaders are not known yet, force
        # metadata update
        if unknown_leaders_exist:
-            log.debug('Unknown leaders exist, requesting metadata update')
+            log.debug('%s: Unknown leaders exist, requesting metadata update', str(self))
            self._metadata.request_update()

        # remove any nodes we aren't ready to send to
-        not_ready_timeout = float('inf')
+        not_ready_timeout_ms = float('inf')
        for node in list(ready_nodes):
            if not self._client.is_ready(node):
-                log.debug('Node %s not ready; delaying produce of accumulated batch', node)
+                node_delay_ms = self._client.connection_delay(node)
+                log.debug('%s: Node %s not ready; delaying produce of accumulated batch (%f ms)', str(self), node, node_delay_ms)
                self._client.maybe_connect(node, wakeup=False)
                ready_nodes.remove(node)
-                not_ready_timeout = min(not_ready_timeout,
-                                        self._client.connection_delay(node))
+                not_ready_timeout_ms = min(not_ready_timeout_ms, node_delay_ms)

        # create produce requests
        batches_by_node = self._accumulator.drain(
-            self._metadata, ready_nodes, self.config['max_request_size'])
+            self._metadata, ready_nodes, self.config['max_request_size'], now=now)
+
+        for batch_list in six.itervalues(batches_by_node):
+            for batch in batch_list:
+                item = (batch.created, batch)
+                queue = self._in_flight_batches[batch.topic_partition]
+                heapq.heappush(queue, item)

        if self.config['guarantee_message_order']:
            # Mute all the partitions drained
@@ -122,42 +209,130 @@ class Sender(threading.Thread):
                for batch in batch_list:
                    self._accumulator.muted.add(batch.topic_partition)

-        expired_batches = self._accumulator.abort_expired_batches(
-            self.config['request_timeout_ms'], self._metadata)
-        for expired_batch in expired_batches:
-            self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count)
+        self._accumulator.reset_next_batch_expiry_time()
+        expired_batches = self._accumulator.expired_batches(now=now)
+        expired_batches.extend(self._get_expired_inflight_batches(now=now))
+
+        if expired_batches:
+            log.debug("%s: Expired %s batches in accumulator", str(self), len(expired_batches))
+
+        # Reset the producer_id if an expired batch has previously been sent to the broker.
+        # See the documentation of `TransactionState.reset_producer_id` to understand why
+        # we need to reset the producer id here.
+        if self._transaction_manager and any([batch.in_retry() for batch in expired_batches]):
+            needs_transaction_state_reset = True
+        else:
+            needs_transaction_state_reset = False
+
+        for expired_batch in expired_batches:
+            error = Errors.KafkaTimeoutError(
+                "Expiring %d record(s) for %s: %s ms has passed since batch creation" % (
+                    expired_batch.record_count, expired_batch.topic_partition,
+                    int((time.time() - expired_batch.created) * 1000)))
+            self._fail_batch(expired_batch, error, base_offset=-1)
+
+        if self._sensors:
+            self._sensors.update_produce_request_metrics(batches_by_node)
+
+        if needs_transaction_state_reset:
+            self._transaction_manager.reset_producer_id()
+            return 0

-        self._sensors.update_produce_request_metrics(batches_by_node)
        requests = self._create_produce_requests(batches_by_node)
        # If we have any nodes that are ready to send + have sendable data,
        # poll with 0 timeout so this can immediately loop and try sending more
-        # data. Otherwise, the timeout is determined by nodes that have
-        # partitions with data that isn't yet sendable (e.g. lingering, backing
-        # off). Note that this specifically does not include nodes with
+        # data. Otherwise, the timeout will be the smaller value between next
+        # batch expiry time, and the delay time for checking data availability.
+        # Note that the nodes may have data that isn't yet sendable due to
+        # lingering, backing off, etc. This specifically does not include nodes with
        # sendable data that aren't ready to send since they would cause busy
        # looping.
-        poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout)
+        poll_timeout_ms = min(next_ready_check_delay * 1000,
+                              not_ready_timeout_ms,
+                              self._accumulator.next_expiry_time_ms - now * 1000)
+        if poll_timeout_ms < 0:
+            poll_timeout_ms = 0
+
        if ready_nodes:
-            log.debug("Nodes with data ready to send: %s", ready_nodes) # trace
-            log.debug("Created %d produce requests: %s", len(requests), requests) # trace
+            log.debug("%s: Nodes with data ready to send: %s", str(self), ready_nodes) # trace
+            log.debug("%s: Created %d produce requests: %s", str(self), len(requests), requests) # trace
+            # if some partitions are already ready to be sent, the select time
+            # would be 0; otherwise if some partition already has some data
+            # accumulated but not ready yet, the select time will be the time
+            # difference between now and its linger expiry time; otherwise the
+            # select time will be the time difference between now and the
+            # metadata expiry time
            poll_timeout_ms = 0

        for node_id, request in six.iteritems(requests):
            batches = batches_by_node[node_id]
-            log.debug('Sending Produce Request: %r', request)
+            log.debug('%s: Sending Produce Request: %r', str(self), request)
            (self._client.send(node_id, request, wakeup=False)
                 .add_callback(
                     self._handle_produce_response, node_id, time.time(), batches)
                 .add_errback(
                     self._failed_produce, batches, node_id))
+        return poll_timeout_ms

-        # if some partitions are already ready to be sent, the select time
-        # would be 0; otherwise if some partition already has some data
-        # accumulated but not ready yet, the select time will be the time
-        # difference between now and its linger expiry time; otherwise the
-        # select time will be the time difference between now and the
-        # metadata expiry time
-        self._client.poll(timeout_ms=poll_timeout_ms)
+    def _maybe_send_transactional_request(self):
+        if self._transaction_manager.is_completing() and self._accumulator.has_incomplete:
+            if self._transaction_manager.is_aborting():
+                self._accumulator.abort_undrained_batches(Errors.KafkaError("Failing batch since transaction was aborted"))
+            # There may still be requests left which are being retried. Since we do not know whether they had
+            # been successfully appended to the broker log, we must resend them until their final status is clear.
+            # If they had been appended and we did not receive the error, then our sequence number would no longer
+            # be correct which would lead to an OutOfSequenceNumberError.
+            if not self._accumulator.flush_in_progress():
+                self._accumulator.begin_flush()
+
+        next_request_handler = self._transaction_manager.next_request_handler(self._accumulator.has_incomplete)
+        if next_request_handler is None:
+            return False
+
+        log.debug("%s: Sending transactional request %s", str(self), next_request_handler.request)
+        while not self._force_close:
+            target_node = None
+            try:
+                if next_request_handler.needs_coordinator():
+                    target_node = self._transaction_manager.coordinator(next_request_handler.coordinator_type)
+                    if target_node is None:
+                        self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
+                        break
+                    elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
+                        self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
+                        target_node = None
+                        break
+                else:
+                    target_node = self._client.least_loaded_node()
+                    if target_node is not None and not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
+                        target_node = None
+
+                if target_node is not None:
+                    if next_request_handler.is_retry:
+                        time.sleep(self.config['retry_backoff_ms'] / 1000)
+                    txn_correlation_id = self._transaction_manager.next_in_flight_request_correlation_id()
+                    future = self._client.send(target_node, next_request_handler.request)
+                    future.add_both(next_request_handler.on_complete, txn_correlation_id)
+                    return True
+
+            except Exception as e:
+                log.warn("%s: Got an exception when trying to find a node to send a transactional request to. Going to back off and retry: %s", str(self), e)
+                if next_request_handler.needs_coordinator():
+                    self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
+                    break
+
+            time.sleep(self.config['retry_backoff_ms'] / 1000)
+            self._metadata.request_update()
+
+        if target_node is None:
+            self._transaction_manager.retry(next_request_handler)
+
+        return True
+
+    def _maybe_abort_batches(self, exc):
+        if self._accumulator.has_incomplete:
+            log.error("%s: Aborting producer batches due to fatal error: %s", str(self), exc)
+            self._accumulator.abort_batches(exc)

    def initiate_close(self):
        """Start closing the sender (won't complete until all data is sent)."""
@@ -180,82 +355,164 @@ class Sender(threading.Thread):
            self._topics_to_add.add(topic)
            self.wakeup()

+    def _maybe_wait_for_producer_id(self):
+        while not self._transaction_manager.has_producer_id():
+            try:
+                node_id = self._client.least_loaded_node()
+                if node_id is None or not self._client.await_ready(node_id):
+                    log.debug("%s, Could not find an available broker to send InitProducerIdRequest to." +
+                              " Will back off and try again.", str(self))
+                    time.sleep(self._client.least_loaded_node_refresh_ms() / 1000)
+                    continue
+                version = self._client.api_version(InitProducerIdRequest, max_version=1)
+                request = InitProducerIdRequest[version](
+                    transactional_id=self.config['transactional_id'],
+                    transaction_timeout_ms=self.config['transaction_timeout_ms'],
+                )
+                response = self._client.send_and_receive(node_id, request)
+                error_type = Errors.for_code(response.error_code)
+                if error_type is Errors.NoError:
+                    self._transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch))
+                    break
+                elif getattr(error_type, 'retriable', False):
+                    log.debug("%s: Retriable error from InitProducerId response: %s", str(self), error_type.__name__)
+                    if getattr(error_type, 'invalid_metadata', False):
+                        self._metadata.request_update()
+                else:
+                    self._transaction_manager.transition_to_fatal_error(error_type())
+                    break
+            except Errors.KafkaConnectionError:
+                log.debug("%s: Broker %s disconnected while awaiting InitProducerId response", str(self), node_id)
+            except Errors.RequestTimedOutError:
+                log.debug("%s: InitProducerId request to node %s timed out", str(self), node_id)
+            log.debug("%s: Retry InitProducerIdRequest in %sms.", str(self), self.config['retry_backoff_ms'])
+            time.sleep(self.config['retry_backoff_ms'] / 1000)
+
    def _failed_produce(self, batches, node_id, error):
-        log.debug("Error sending produce request to node %d: %s", node_id, error) # trace
+        log.error("%s: Error sending produce request to node %d: %s", str(self), node_id, error) # trace
        for batch in batches:
-            self._complete_batch(batch, error, -1, None)
+            self._complete_batch(batch, error, -1)

    def _handle_produce_response(self, node_id, send_time, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
-        log.debug('Parsing produce response: %r', response)
+        log.debug('%s: Parsing produce response: %r', str(self), response)
        if response:
            batches_by_partition = dict([(batch.topic_partition, batch)
                                         for batch in batches])

            for topic, partitions in response.topics:
                for partition_info in partitions:
-                    global_error = None
-                    log_start_offset = None
                    if response.API_VERSION < 2:
                        partition, error_code, offset = partition_info
                        ts = None
                    elif 2 <= response.API_VERSION <= 4:
                        partition, error_code, offset, ts = partition_info
                    elif 5 <= response.API_VERSION <= 7:
-                        partition, error_code, offset, ts, log_start_offset = partition_info
+                        partition, error_code, offset, ts, _log_start_offset = partition_info
                    else:
-                        # the ignored parameter is record_error of type list[(batch_index: int, error_message: str)]
-                        partition, error_code, offset, ts, log_start_offset, _, global_error = partition_info
+                        # Currently unused / TODO: KIP-467
+                        partition, error_code, offset, ts, _log_start_offset, _record_errors, _global_error = partition_info
                    tp = TopicPartition(topic, partition)
                    error = Errors.for_code(error_code)
                    batch = batches_by_partition[tp]
-                    self._complete_batch(batch, error, offset, ts, log_start_offset, global_error)
-
-            if response.API_VERSION > 0:
-                self._sensors.record_throttle_time(response.throttle_time_ms, node=node_id)
+                    self._complete_batch(batch, error, offset, timestamp_ms=ts)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
-                self._complete_batch(batch, None, -1, None)
+                self._complete_batch(batch, None, -1)

-    def _complete_batch(self, batch, error, base_offset, timestamp_ms=None, log_start_offset=None, global_error=None):
+    def _fail_batch(self, batch, exception, base_offset=None, timestamp_ms=None):
+        exception = exception if type(exception) is not type else exception()
+        if self._transaction_manager:
+            if isinstance(exception, Errors.OutOfOrderSequenceNumberError) and \
+                    not self._transaction_manager.is_transactional() and \
+                    self._transaction_manager.has_producer_id(batch.producer_id):
+                log.error("%s: The broker received an out of order sequence number for topic-partition %s"
+                          " at offset %s. This indicates data loss on the broker, and should be investigated.",
+                          str(self), batch.topic_partition, base_offset)
+
+                # Reset the transaction state since we have hit an irrecoverable exception and cannot make any guarantees
+                # about the previously committed message. Note that this will discard the producer id and sequence
+                # numbers for all existing partitions.
+                self._transaction_manager.reset_producer_id()
+            elif isinstance(exception, (Errors.ClusterAuthorizationFailedError,
+                                        Errors.TransactionalIdAuthorizationFailedError,
+                                        Errors.ProducerFencedError,
+                                        Errors.InvalidTxnStateError)):
+                self._transaction_manager.transition_to_fatal_error(exception)
+            elif self._transaction_manager.is_transactional():
+                self._transaction_manager.transition_to_abortable_error(exception)
+
+        if self._sensors:
+            self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
+
+        if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms, exception=exception):
+            self._maybe_remove_from_inflight_batches(batch)
+            self._accumulator.deallocate(batch)
+
+    def _complete_batch(self, batch, error, base_offset, timestamp_ms=None):
        """Complete or retry the given batch of records.

        Arguments:
-            batch (RecordBatch): The record batch
+            batch (ProducerBatch): The record batch
            error (Exception): The error (or None if none)
            base_offset (int): The base offset assigned to the records if successful
            timestamp_ms (int, optional): The timestamp returned by the broker for this batch
-            log_start_offset (int): The start offset of the log at the time this produce response was created
-            global_error (str): The summarising error message
        """
        # Standardize no-error to None
        if error is Errors.NoError:
            error = None

-        if error is not None and self._can_retry(batch, error):
-            # retry
-            log.warning("Got error produce response on topic-partition %s,"
-                        " retrying (%d attempts left). Error: %s",
-                        batch.topic_partition,
-                        self.config['retries'] - batch.attempts - 1,
-                        global_error or error)
-            self._accumulator.reenqueue(batch)
-            self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
+        if error is not None:
+            if self._can_retry(batch, error):
+                # retry
+                log.warning("%s: Got error produce response on topic-partition %s,"
+                            " retrying (%s attempts left). Error: %s",
+                            str(self), batch.topic_partition,
+                            self.config['retries'] - batch.attempts - 1,
+                            error)
+
+                # If idempotence is enabled only retry the request if the batch matches our current producer id and epoch
+                if not self._transaction_manager or self._transaction_manager.producer_id_and_epoch.match(batch):
+                    log.debug("%s: Retrying batch to topic-partition %s. Sequence number: %s",
+                              str(self), batch.topic_partition,
+                              self._transaction_manager.sequence_number(batch.topic_partition) if self._transaction_manager else None)
+                    self._accumulator.reenqueue(batch)
+                    self._maybe_remove_from_inflight_batches(batch)
+                    if self._sensors:
+                        self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
+                else:
+                    log.warning("%s: Attempted to retry sending a batch but the producer id/epoch changed from %s/%s to %s/%s. This batch will be dropped",
+                                str(self), batch.producer_id, batch.producer_epoch,
+                                self._transaction_manager.producer_id_and_epoch.producer_id,
+                                self._transaction_manager.producer_id_and_epoch.epoch)
+                    self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
+            else:
+                if error is Errors.TopicAuthorizationFailedError:
+                    error = error(batch.topic_partition.topic)
+
+                # tell the user the result of their request
+                self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
+
+            if error is Errors.UnknownTopicOrPartitionError:
+                log.warning("%s: Received unknown topic or partition error in produce request on partition %s."
+                            " The topic/partition may not exist or the user may not have Describe access to it",
+                            str(self), batch.topic_partition)
+
+            if getattr(error, 'invalid_metadata', False):
+                self._metadata.request_update()
+
        else:
-            if error is Errors.TopicAuthorizationFailedError:
-                error = error(batch.topic_partition.topic)
+            if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms):
+                self._maybe_remove_from_inflight_batches(batch)
+                self._accumulator.deallocate(batch)

-            # tell the user the result of their request
-            batch.done(base_offset, timestamp_ms, error, log_start_offset, global_error)
-            self._accumulator.deallocate(batch)
-            if error is not None:
-                self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
-
-        if getattr(error, 'invalid_metadata', False):
-            self._metadata.request_update()
+            if self._transaction_manager and self._transaction_manager.producer_id_and_epoch.match(batch):
+                self._transaction_manager.increment_sequence_number(batch.topic_partition, batch.record_count)
+                log.debug("%s: Incremented sequence number for topic-partition %s to %s", str(self), batch.topic_partition,
+                          self._transaction_manager.sequence_number(batch.topic_partition))

        # Unmute the completed partition.
        if self.config['guarantee_message_order']:
@@ -266,8 +523,10 @@ class Sender(threading.Thread):
        We can retry a send if the error is transient and the number of
        attempts taken is fewer than the maximum allowed
        """
-        return (batch.attempts < self.config['retries']
-                and getattr(error, 'retriable', False))
+        return (not batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms) and
+                batch.attempts < self.config['retries'] and
+                batch.final_state is None and
+                getattr(error, 'retriable', False))

    def _create_produce_requests(self, collated):
        """
@@ -275,23 +534,24 @@ class Sender(threading.Thread):
        per-node basis.

        Arguments:
-            collated: {node_id: [RecordBatch]}
+            collated: {node_id: [ProducerBatch]}

        Returns:
-            dict: {node_id: ProduceRequest} (version depends on api_version)
+            dict: {node_id: ProduceRequest} (version depends on client api_versions)
        """
        requests = {}
        for node_id, batches in six.iteritems(collated):
-            requests[node_id] = self._produce_request(
-                node_id, self.config['acks'],
-                self.config['request_timeout_ms'], batches)
+            if batches:
+                requests[node_id] = self._produce_request(
+                    node_id, self.config['acks'],
+                    self.config['request_timeout_ms'], batches)
        return requests

    def _produce_request(self, node_id, acks, timeout, batches):
        """Create a produce request from the given record batches.

        Returns:
-            ProduceRequest (version depends on api_version)
+            ProduceRequest (version depends on client api_versions)
        """
        produce_records_by_partition = collections.defaultdict(dict)
        for batch in batches:
@@ -301,32 +561,26 @@ class Sender(threading.Thread):
            buf = batch.records.buffer()
            produce_records_by_partition[topic][partition] = buf

-        kwargs = {}
-        if self.config['api_version'] >= (2, 1):
-            version = 7
-        elif self.config['api_version'] >= (2, 0):
-            version = 6
-        elif self.config['api_version'] >= (1, 1):
-            version = 5
-        elif self.config['api_version'] >= (1, 0):
-            version = 4
-        elif self.config['api_version'] >= (0, 11):
-            version = 3
-            kwargs = dict(transactional_id=None)
-        elif self.config['api_version'] >= (0, 10):
-            version = 2
-        elif self.config['api_version'] == (0, 9):
-            version = 1
+        version = self._client.api_version(ProduceRequest, max_version=7)
+        topic_partition_data = [
+            (topic, list(partition_info.items()))
+            for topic, partition_info in six.iteritems(produce_records_by_partition)]
+        transactional_id = self._transaction_manager.transactional_id if self._transaction_manager else None
+        if version >= 3:
+            return ProduceRequest[version](
+                transactional_id=transactional_id,
+                required_acks=acks,
+                timeout=timeout,
+                topics=topic_partition_data,
+            )
        else:
-            version = 0
-        return ProduceRequest[version](
-            required_acks=acks,
-            timeout=timeout,
-            topics=[(topic, list(partition_info.items()))
-                    for topic, partition_info
-                    in six.iteritems(produce_records_by_partition)],
-            **kwargs
-        )
+            if transactional_id is not None:
+                log.warning('%s: Broker does not support ProduceRequest v3+, required for transactional_id', str(self))
+            return ProduceRequest[version](
+                required_acks=acks,
+                timeout=timeout,
+                topics=topic_partition_data,
+            )

    def wakeup(self):
        """Wake up the selector associated with this send thread."""
@@ -335,6 +589,9 @@ class Sender(threading.Thread):
    def bootstrap_connected(self):
        return self._client.bootstrap_connected()

+    def __str__(self):
+        return "<Sender client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
+

 class SenderMetrics(object):

@@ -367,15 +624,6 @@ class SenderMetrics(object):
                        sensor_name=sensor_name,
                        description='The maximum time in ms record batches spent in the record accumulator.')

-        sensor_name = 'produce-throttle-time'
-        self.produce_throttle_time_sensor = self.metrics.sensor(sensor_name)
-        self.add_metric('produce-throttle-time-avg', Avg(),
-                        sensor_name=sensor_name,
-                        description='The average throttle time in ms')
-        self.add_metric('produce-throttle-time-max', Max(),
-                        sensor_name=sensor_name,
-                        description='The maximum throttle time in ms')
-
        sensor_name = 'records-per-request'
        self.records_per_request_sensor = self.metrics.sensor(sensor_name)
        self.add_metric('record-send-rate', Rate(),
@@ -498,8 +746,9 @@ class SenderMetrics(object):
                records += batch.record_count
                total_bytes += batch.records.size_in_bytes()

-            self.records_per_request_sensor.record(records)
-            self.byte_rate_sensor.record(total_bytes)
+            if node_batch:
+                self.records_per_request_sensor.record(records)
+                self.byte_rate_sensor.record(total_bytes)

    def record_retries(self, topic, count):
        self.retry_sensor.record(count)
@@ -512,6 +761,3 @@ class SenderMetrics(object):
        sensor = self.metrics.get_sensor('topic.' + topic + '.record-errors')
        if sensor:
            sensor.record(count)
-
-    def record_throttle_time(self, throttle_time_ms, node=None):
-        self.produce_throttle_time_sensor.record(throttle_time_ms)