main commit

2025-10-16 16:30:25 +09:00
parent 91c7e04474
commit 537e7b363f
1146 changed files with 45926 additions and 77196 deletions
--- a/venv/lib/python3.12/site-packages/kafka/producer/buffer.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/buffer.py
@@ -0,0 +1,115 @@
+from __future__ import absolute_import, division
+
+import collections
+import io
+import threading
+import time
+
+from kafka.metrics.stats import Rate
+
+import kafka.errors as Errors
+
+
+class SimpleBufferPool(object):
+    """A simple pool of BytesIO objects with a weak memory ceiling."""
+    def __init__(self, memory, poolable_size, metrics=None, metric_group_prefix='producer-metrics'):
+        """Create a new buffer pool.
+
+        Arguments:
+            memory (int): maximum memory that this buffer pool can allocate
+            poolable_size (int): memory size per buffer to cache in the free
+                list rather than deallocating
+        """
+        self._poolable_size = poolable_size
+        self._lock = threading.RLock()
+
+        buffers = int(memory / poolable_size) if poolable_size else 0
+        self._free = collections.deque([io.BytesIO() for _ in range(buffers)])
+
+        self._waiters = collections.deque()
+        self.wait_time = None
+        if metrics:
+            self.wait_time = metrics.sensor('bufferpool-wait-time')
+            self.wait_time.add(metrics.metric_name(
+                'bufferpool-wait-ratio', metric_group_prefix,
+                'The fraction of time an appender waits for space allocation.'),
+                Rate())
+
+    def allocate(self, size, max_time_to_block_ms):
+        """
+        Allocate a buffer of the given size. This method blocks if there is not
+        enough memory and the buffer pool is configured with blocking mode.
+
+        Arguments:
+            size (int): The buffer size to allocate in bytes [ignored]
+            max_time_to_block_ms (int): The maximum time in milliseconds to
+                block for buffer memory to be available
+
+        Returns:
+            io.BytesIO
+        """
+        with self._lock:
+            # check if we have a free buffer of the right size pooled
+            if self._free:
+                return self._free.popleft()
+
+            elif self._poolable_size == 0:
+                return io.BytesIO()
+
+            else:
+                # we are out of buffers and will have to block
+                buf = None
+                more_memory = threading.Condition(self._lock)
+                self._waiters.append(more_memory)
+                # loop over and over until we have a buffer or have reserved
+                # enough memory to allocate one
+                while buf is None:
+                    start_wait = time.time()
+                    more_memory.wait(max_time_to_block_ms / 1000.0)
+                    end_wait = time.time()
+                    if self.wait_time:
+                        self.wait_time.record(end_wait - start_wait)
+
+                    if self._free:
+                        buf = self._free.popleft()
+                    else:
+                        self._waiters.remove(more_memory)
+                        raise Errors.KafkaTimeoutError(
+                            "Failed to allocate memory within the configured"
+                            " max blocking time")
+
+                # remove the condition for this thread to let the next thread
+                # in line start getting memory
+                removed = self._waiters.popleft()
+                assert removed is more_memory, 'Wrong condition'
+
+                # signal any additional waiters if there is more memory left
+                # over for them
+                if self._free and self._waiters:
+                    self._waiters[0].notify()
+
+                # unlock and return the buffer
+                return buf
+
+    def deallocate(self, buf):
+        """
+        Return buffers to the pool. If they are of the poolable size add them
+        to the free list, otherwise just mark the memory as free.
+
+        Arguments:
+            buffer_ (io.BytesIO): The buffer to return
+        """
+        with self._lock:
+            # BytesIO.truncate here makes the pool somewhat pointless
+            # but we stick with the BufferPool API until migrating to
+            # bytesarray / memoryview. The buffer we return must not
+            # expose any prior data on read().
+            buf.truncate(0)
+            self._free.append(buf)
+            if self._waiters:
+                self._waiters[0].notify()
+
+    def queued(self):
+        """The number of threads blocked waiting on memory."""
+        with self._lock:
+            return len(self._waiters)
--- a/venv/lib/python3.12/site-packages/kafka/producer/future.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/future.py
@@ -38,7 +38,7 @@ class FutureRecordMetadata(Future):
        produce_future.add_errback(self.failure)

    def _produce_success(self, offset_and_timestamp):
-        offset, produce_timestamp_ms = offset_and_timestamp
+        offset, produce_timestamp_ms, log_start_offset = offset_and_timestamp

        # Unpacking from args tuple is minor speed optimization
        (relative_offset, timestamp_ms, checksum,
@@ -51,7 +51,7 @@ class FutureRecordMetadata(Future):
        if offset != -1 and relative_offset is not None:
            offset += relative_offset
        tp = self._produce_future.topic_partition
-        metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms,
+        metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, log_start_offset,
                                  checksum, serialized_key_size,
                                  serialized_value_size, serialized_header_size)
        self.success(metadata)
@@ -67,5 +67,5 @@ class FutureRecordMetadata(Future):


 RecordMetadata = collections.namedtuple(
-    'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp',
+    'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', 'log_start_offset',
                       'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size'])
--- a/venv/lib/python3.12/site-packages/kafka/producer/kafka.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/kafka.py
@@ -1,11 +1,11 @@
-from __future__ import absolute_import, division
+from __future__ import absolute_import

 import atexit
 import copy
 import logging
 import socket
 import threading
-import warnings
+import time
 import weakref

 from kafka.vendor import six
@@ -18,12 +18,10 @@ from kafka.partitioner.default import DefaultPartitioner
 from kafka.producer.future import FutureRecordMetadata, FutureProduceResult
 from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator
 from kafka.producer.sender import Sender
-from kafka.producer.transaction_manager import TransactionManager
 from kafka.record.default_records import DefaultRecordBatchBuilder
 from kafka.record.legacy_records import LegacyRecordBatchBuilder
 from kafka.serializer import Serializer
 from kafka.structs import TopicPartition
-from kafka.util import Timer, ensure_valid_topic_name


 log = logging.getLogger(__name__)
@@ -36,8 +34,8 @@ class KafkaProducer(object):
    The producer is thread safe and sharing a single producer instance across
    threads will generally be faster than having multiple instances.

-    The producer consists of a RecordAccumulator which holds records that
-    haven't yet been transmitted to the server, and a Sender background I/O
+    The producer consists of a pool of buffer space that holds records that
+    haven't yet been transmitted to the server as well as a background I/O
    thread that is responsible for turning these records into requests and
    transmitting them to the cluster.

@@ -73,50 +71,14 @@ class KafkaProducer(object):
    can lead to fewer, more efficient requests when not under maximal load at
    the cost of a small amount of latency.

+    The buffer_memory controls the total amount of memory available to the
+    producer for buffering. If records are sent faster than they can be
+    transmitted to the server then this buffer space will be exhausted. When
+    the buffer space is exhausted additional send calls will block.
+
    The key_serializer and value_serializer instruct how to turn the key and
    value objects the user provides into bytes.

-    From Kafka 0.11, the KafkaProducer supports two additional modes:
-    the idempotent producer and the transactional producer.
-    The idempotent producer strengthens Kafka's delivery semantics from
-    at least once to exactly once delivery. In particular, producer retries
-    will no longer introduce duplicates. The transactional producer allows an
-    application to send messages to multiple partitions (and topics!)
-    atomically.
-
-    To enable idempotence, the `enable_idempotence` configuration must be set
-    to True. If set, the `retries` config will default to `float('inf')` and
-    the `acks` config will default to 'all'. There are no API changes for the
-    idempotent producer, so existing applications will not need to be modified
-    to take advantage of this feature.
-
-    To take advantage of the idempotent producer, it is imperative to avoid
-    application level re-sends since these cannot be de-duplicated. As such, if
-    an application enables idempotence, it is recommended to leave the
-    `retries` config unset, as it will be defaulted to `float('inf')`.
-    Additionally, if a :meth:`~kafka.KafkaProducer.send` returns an error even
-    with infinite retries (for instance if the message expires in the buffer
-    before being sent), then it is recommended to shut down the producer and
-    check the contents of the last produced message to ensure that it is not
-    duplicated. Finally, the producer can only guarantee idempotence for
-    messages sent within a single session.
-
-    To use the transactional producer and the attendant APIs, you must set the
-    `transactional_id` configuration property. If the `transactional_id` is
-    set, idempotence is automatically enabled along with the producer configs
-    which idempotence depends on. Further, topics which are included in
-    transactions should be configured for durability. In particular, the
-    `replication.factor` should be at least `3`, and the `min.insync.replicas`
-    for these topics should be set to 2. Finally, in order for transactional
-    guarantees to be realized from end-to-end, the consumers must be
-    configured to read only committed messages as well.
-
-    The purpose of the `transactional_id` is to enable transaction recovery
-    across multiple sessions of a single producer instance. It would typically
-    be derived from the shard identifier in a partitioned, stateful,
-    application. As such, it should be unique to each producer instance running
-    within a partitioned application.
-
    Keyword Arguments:
        bootstrap_servers: 'host[:port]' string (or list of 'host[:port]'
            strings) that the producer should contact to bootstrap initial
@@ -134,28 +96,6 @@ class KafkaProducer(object):
        value_serializer (callable): used to convert user-supplied message
            values to bytes. If not None, called as f(value), should return
            bytes. Default: None.
-        enable_idempotence (bool): When set to True, the producer will ensure
-            that exactly one copy of each message is written in the stream.
-            If False, producer retries due to broker failures, etc., may write
-            duplicates of the retried message in the stream. Default: False.
-
-            Note that enabling idempotence requires
-            `max_in_flight_requests_per_connection` to be set to 1 and `retries`
-            cannot be zero. Additionally, `acks` must be set to 'all'. If these
-            values are left at their defaults, the producer will override the
-            defaults to be suitable. If the values are set to something
-            incompatible with the idempotent producer, a KafkaConfigurationError
-            will be raised.
-        delivery_timeout_ms (float): An upper bound on the time to report success
-            or failure after producer.send() returns. This limits the total time
-            that a record will be delayed prior to sending, the time to await
-            acknowledgement from the broker (if expected), and the time allowed
-            for retriable send failures. The producer may report failure to send
-            a record earlier than this config if either an unrecoverable error is
-            encountered, the retries have been exhausted, or the record is added
-            to a batch which reached an earlier delivery expiration deadline.
-            The value of this config should be greater than or equal to the
-            sum of (request_timeout_ms + linger_ms). Default: 120000.
        acks (0, 1, 'all'): The number of acknowledgments the producer requires
            the leader to have received before considering a request complete.
            This controls the durability of records that are sent. The
@@ -183,7 +123,7 @@ class KafkaProducer(object):
            Compression is of full batches of data, so the efficacy of batching
            will also impact the compression ratio (more batching means better
            compression). Default: None.
-        retries (numeric): Setting a value greater than zero will cause the client
+        retries (int): Setting a value greater than zero will cause the client
            to resend any record whose send fails with a potentially transient
            error. Note that this retry is no different than if the client
            resent the record upon receiving the error. Allowing retries
@@ -191,12 +131,8 @@ class KafkaProducer(object):
            potentially change the ordering of records because if two batches
            are sent to a single partition, and the first fails and is retried
            but the second succeeds, then the records in the second batch may
-            appear first. Note additionally that produce requests will be
-            failed before the number of retries has been exhausted if the timeout
-            configured by delivery_timeout_ms expires first before successful
-            acknowledgement. Users should generally prefer to leave this config
-            unset and instead use delivery_timeout_ms to control retry behavior.
-            Default: float('inf') (infinite)
+            appear first.
+            Default: 0.
        batch_size (int): Requests sent to brokers will contain multiple
            batches, one for each partition with data available to be sent.
            A small batch size will make batching less common and may reduce
@@ -229,6 +165,12 @@ class KafkaProducer(object):
            messages with the same key are assigned to the same partition.
            When a key is None, the message is delivered to a random partition
            (filtered to partitions with available leaders only, if possible).
+        buffer_memory (int): The total bytes of memory the producer should use
+            to buffer records waiting to be sent to the server. If records are
+            sent faster than they can be delivered to the server the producer
+            will block up to max_block_ms, raising an exception on timeout.
+            In the current implementation, this setting is an approximation.
+            Default: 33554432 (32MB)
        connections_max_idle_ms: Close idle connections after the number of
            milliseconds specified by this config. The broker closes idle
            connections after connections.max.idle.ms, so this avoids hitting
@@ -246,9 +188,6 @@ class KafkaProducer(object):
            This setting will limit the number of record batches the producer
            will send in a single request to avoid sending huge requests.
            Default: 1048576.
-        allow_auto_create_topics (bool): Enable/disable auto topic creation
-            on metadata request. Only available with api_version >= (0, 11).
-            Default: True
        metadata_max_age_ms (int): The period of time in milliseconds after
            which we force a refresh of metadata even if we haven't seen any
            partition leadership changes to proactively discover any new
@@ -277,7 +216,7 @@ class KafkaProducer(object):
            reconnection attempts will continue periodically with this fixed
            rate. To avoid connection storms, a randomization factor of 0.2
            will be applied to the backoff resulting in a random range between
-            20% below and 20% above the computed value. Default: 30000.
+            20% below and 20% above the computed value. Default: 1000.
        max_in_flight_requests_per_connection (int): Requests are pipelined
            to kafka brokers up to this number of maximum requests per
            broker connection. Note that if this setting is set to be greater
@@ -294,7 +233,7 @@ class KafkaProducer(object):
            should verify that the certificate matches the brokers hostname.
            default: true.
        ssl_cafile (str): optional filename of ca file to use in certificate
-            verification. default: none.
+            veriication. default: none.
        ssl_certfile (str): optional filename of file in pem format containing
            the client certificate, as well as any ca certificates needed to
            establish the certificate's authenticity. default: none.
@@ -313,28 +252,14 @@ class KafkaProducer(object):
            or other configuration forbids use of all the specified ciphers),
            an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
        api_version (tuple): Specify which Kafka API version to use. If set to
-            None, the client will attempt to determine the broker version via
-            ApiVersionsRequest API or, for brokers earlier than 0.10, probing
-            various known APIs. Dynamic version checking is performed eagerly
-            during __init__ and can raise NoBrokersAvailableError if no connection
-            was made before timeout (see api_version_auto_timeout_ms below).
-            Different versions enable different functionality.
-
-            Examples:
-                (3, 9) most recent broker release, enable all supported features
-                (0, 11) enables message format v2 (internal)
-                (0, 10, 0) enables sasl authentication and message format v1
-                (0, 8, 0) enables basic functionality only
-
-            Default: None
+            None, the client will attempt to infer the broker version by probing
+            various APIs. Example: (0, 10, 2). Default: None
        api_version_auto_timeout_ms (int): number of milliseconds to throw a
            timeout exception from the constructor when checking the broker
            api version. Only applies if api_version set to None.
-            Default: 2000
        metric_reporters (list): A list of classes to use as metrics reporters.
            Implementing the AbstractMetricsReporter interface allows plugging
            in classes that will be notified of new metric creation. Default: []
-        metrics_enabled (bool): Whether to track metrics on this instance. Default True.
        metrics_num_samples (int): The number of samples maintained to compute
            metrics. Default: 2
        metrics_sample_window_ms (int): The maximum age in milliseconds of
@@ -349,42 +274,33 @@ class KafkaProducer(object):
            Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
        sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
            Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
-        sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
-            sasl mechanism handshake. If provided, sasl_kerberos_service_name and
-            sasl_kerberos_domain name are ignored. Default: None.
        sasl_kerberos_service_name (str): Service name to include in GSSAPI
            sasl mechanism handshake. Default: 'kafka'
        sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
            sasl mechanism handshake. Default: one of bootstrap servers
-        sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
-            token provider instance. Default: None
-        socks5_proxy (str): Socks5 proxy URL. Default: None
-        kafka_client (callable): Custom class / callable for creating KafkaClient instances
+        sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
+            instance. (See kafka.oauth.abstract). Default: None

    Note:
        Configuration parameters are described in more detail at
-        https://kafka.apache.org/0100/documentation/#producerconfigs
+        https://kafka.apache.org/0100/configuration.html#producerconfigs
    """
    DEFAULT_CONFIG = {
        'bootstrap_servers': 'localhost',
        'client_id': None,
        'key_serializer': None,
        'value_serializer': None,
-        'enable_idempotence': False,
-        'transactional_id': None,
-        'transaction_timeout_ms': 60000,
-        'delivery_timeout_ms': 120000,
        'acks': 1,
        'bootstrap_topics_filter': set(),
        'compression_type': None,
-        'retries': float('inf'),
+        'retries': 0,
        'batch_size': 16384,
        'linger_ms': 0,
        'partitioner': DefaultPartitioner(),
+        'buffer_memory': 33554432,
        'connections_max_idle_ms': 9 * 60 * 1000,
        'max_block_ms': 60000,
        'max_request_size': 1048576,
-        'allow_auto_create_topics': True,
        'metadata_max_age_ms': 300000,
        'retry_backoff_ms': 100,
        'request_timeout_ms': 30000,
@@ -394,7 +310,7 @@ class KafkaProducer(object):
        'sock_chunk_bytes': 4096,  # undocumented experimental option
        'sock_chunk_buffer_count': 1000,  # undocumented experimental option
        'reconnect_backoff_ms': 50,
-        'reconnect_backoff_max_ms': 30000,
+        'reconnect_backoff_max_ms': 1000,
        'max_in_flight_requests_per_connection': 5,
        'security_protocol': 'PLAINTEXT',
        'ssl_context': None,
@@ -408,23 +324,17 @@ class KafkaProducer(object):
        'api_version': None,
        'api_version_auto_timeout_ms': 2000,
        'metric_reporters': [],
-        'metrics_enabled': True,
        'metrics_num_samples': 2,
        'metrics_sample_window_ms': 30000,
        'selector': selectors.DefaultSelector,
        'sasl_mechanism': None,
        'sasl_plain_username': None,
        'sasl_plain_password': None,
-        'sasl_kerberos_name': None,
        'sasl_kerberos_service_name': 'kafka',
        'sasl_kerberos_domain_name': None,
-        'sasl_oauth_token_provider': None,
-        'socks5_proxy': None,
-        'kafka_client': KafkaClient,
+        'sasl_oauth_token_provider': None
    }

-    DEPRECATED_CONFIGS = ('buffer_memory',)
-
    _COMPRESSORS = {
        'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP),
        'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY),
@@ -434,17 +344,12 @@ class KafkaProducer(object):
    }

    def __init__(self, **configs):
+        log.debug("Starting the Kafka producer")  # trace
        self.config = copy.copy(self.DEFAULT_CONFIG)
-        user_provided_configs = set(configs.keys())
        for key in self.config:
            if key in configs:
                self.config[key] = configs.pop(key)

-        for key in self.DEPRECATED_CONFIGS:
-            if key in configs:
-                configs.pop(key)
-                warnings.warn('Deprecated Producer config: %s' % (key,), DeprecationWarning)
-
        # Only check for extra config keys in top-level class
        assert not configs, 'Unrecognized configs: %s' % (configs,)

@@ -462,35 +367,30 @@ class KafkaProducer(object):
                self.config['api_version'] = None
            else:
                self.config['api_version'] = tuple(map(int, deprecated.split('.')))
-            log.warning('%s: use api_version=%s [tuple] -- "%s" as str is deprecated',
-                        str(self), str(self.config['api_version']), deprecated)
-
-        log.debug("%s: Starting Kafka producer", str(self))
+            log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
+                        str(self.config['api_version']), deprecated)

        # Configure metrics
-        if self.config['metrics_enabled']:
-            metrics_tags = {'client-id': self.config['client_id']}
-            metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
-                                         time_window_ms=self.config['metrics_sample_window_ms'],
-                                         tags=metrics_tags)
-            reporters = [reporter() for reporter in self.config['metric_reporters']]
-            self._metrics = Metrics(metric_config, reporters)
-        else:
-            self._metrics = None
+        metrics_tags = {'client-id': self.config['client_id']}
+        metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
+                                     time_window_ms=self.config['metrics_sample_window_ms'],
+                                     tags=metrics_tags)
+        reporters = [reporter() for reporter in self.config['metric_reporters']]
+        self._metrics = Metrics(metric_config, reporters)

-        client = self.config['kafka_client'](
-            metrics=self._metrics, metric_group_prefix='producer',
-            wakeup_timeout_ms=self.config['max_block_ms'],
-            **self.config)
+        client = KafkaClient(metrics=self._metrics, metric_group_prefix='producer',
+                             wakeup_timeout_ms=self.config['max_block_ms'],
+                             **self.config)

-        # Get auto-discovered / normalized version from client
-        self.config['api_version'] = client.config['api_version']
+        # Get auto-discovered version from client if necessary
+        if self.config['api_version'] is None:
+            self.config['api_version'] = client.config['api_version']

        if self.config['compression_type'] == 'lz4':
            assert self.config['api_version'] >= (0, 8, 2), 'LZ4 Requires >= Kafka 0.8.2 Brokers'

        if self.config['compression_type'] == 'zstd':
-            assert self.config['api_version'] >= (2, 1), 'Zstd Requires >= Kafka 2.1 Brokers'
+            assert self.config['api_version'] >= (2, 1, 0), 'Zstd Requires >= Kafka 2.1.0 Brokers'

        # Check compression_type for library support
        ct = self.config['compression_type']
@@ -501,58 +401,12 @@ class KafkaProducer(object):
            assert checker(), "Libraries for {} compression codec not found".format(ct)
            self.config['compression_attrs'] = compression_attrs

+        message_version = self._max_usable_produce_magic()
+        self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config)
        self._metadata = client.cluster
-        self._transaction_manager = None
-        self._init_transactions_result = None
-        if 'enable_idempotence' in user_provided_configs and not self.config['enable_idempotence'] and self.config['transactional_id']:
-            raise Errors.KafkaConfigurationError("Cannot set transactional_id without enable_idempotence.")
-
-        if self.config['transactional_id']:
-            self.config['enable_idempotence'] = True
-
-        if self.config['enable_idempotence']:
-            assert self.config['api_version'] >= (0, 11), "Transactional/Idempotent producer requires >= Kafka 0.11 Brokers"
-
-            self._transaction_manager = TransactionManager(
-                transactional_id=self.config['transactional_id'],
-                transaction_timeout_ms=self.config['transaction_timeout_ms'],
-                retry_backoff_ms=self.config['retry_backoff_ms'],
-                api_version=self.config['api_version'],
-                metadata=self._metadata,
-            )
-            if self._transaction_manager.is_transactional():
-                log.info("%s: Instantiated a transactional producer.", str(self))
-            else:
-                log.info("%s: Instantiated an idempotent producer.", str(self))
-
-            if self.config['retries'] == 0:
-                raise Errors.KafkaConfigurationError("Must set 'retries' to non-zero when using the idempotent producer.")
-
-            if 'max_in_flight_requests_per_connection' not in user_provided_configs:
-                log.info("%s: Overriding the default 'max_in_flight_requests_per_connection' to 1 since idempontence is enabled.", str(self))
-                self.config['max_in_flight_requests_per_connection'] = 1
-            elif self.config['max_in_flight_requests_per_connection'] != 1:
-                raise Errors.KafkaConfigurationError("Must set 'max_in_flight_requests_per_connection' to 1 in order"
-                                                     " to use the idempotent producer."
-                                                     " Otherwise we cannot guarantee idempotence.")
-
-            if 'acks' not in user_provided_configs:
-                log.info("%s: Overriding the default 'acks' config to 'all' since idempotence is enabled", str(self))
-                self.config['acks'] = -1
-            elif self.config['acks'] != -1:
-                raise Errors.KafkaConfigurationError("Must set 'acks' config to 'all' in order to use the idempotent"
-                                                     " producer. Otherwise we cannot guarantee idempotence")
-
-        message_version = self.max_usable_produce_magic(self.config['api_version'])
-        self._accumulator = RecordAccumulator(
-                transaction_manager=self._transaction_manager,
-                message_version=message_version,
-                **self.config)
        guarantee_message_order = bool(self.config['max_in_flight_requests_per_connection'] == 1)
        self._sender = Sender(client, self._metadata,
-                              self._accumulator,
-                              metrics=self._metrics,
-                              transaction_manager=self._transaction_manager,
+                              self._accumulator, self._metrics,
                              guarantee_message_order=guarantee_message_order,
                              **self.config)
        self._sender.daemon = True
@@ -561,7 +415,7 @@ class KafkaProducer(object):

        self._cleanup = self._cleanup_factory()
        atexit.register(self._cleanup)
-        log.debug("%s: Kafka producer started", str(self))
+        log.debug("Kafka producer started")

    def bootstrap_connected(self):
        """Return True if the bootstrap is connected."""
@@ -572,7 +426,7 @@ class KafkaProducer(object):
        _self = weakref.proxy(self)
        def wrapper():
            try:
-                _self.close(timeout=0, null_logger=True)
+                _self.close(timeout=0)
            except (ReferenceError, AttributeError):
                pass
        return wrapper
@@ -595,28 +449,28 @@ class KafkaProducer(object):
        self._cleanup = None

    def __del__(self):
-        self.close(timeout=1, null_logger=True)
+        # Disable logger during destruction to avoid touching dangling references
+        class NullLogger(object):
+            def __getattr__(self, name):
+                return lambda *args: None

-    def close(self, timeout=None, null_logger=False):
+        global log
+        log = NullLogger()
+
+        self.close()
+
+    def close(self, timeout=None):
        """Close this producer.

        Arguments:
            timeout (float, optional): timeout in seconds to wait for completion.
        """
-        if null_logger:
-            # Disable logger during destruction to avoid touching dangling references
-            class NullLogger(object):
-                def __getattr__(self, name):
-                    return lambda *args: None
-
-            global log
-            log = NullLogger()

        # drop our atexit handler now to avoid leaks
        self._unregister_cleanup()

        if not hasattr(self, '_closed') or self._closed:
-            log.info('%s: Kafka producer closed', str(self))
+            log.info('Kafka producer closed')
            return
        if timeout is None:
            # threading.TIMEOUT_MAX is available in Python3.3+
@@ -626,16 +480,15 @@ class KafkaProducer(object):
        else:
            assert timeout >= 0

-        log.info("%s: Closing the Kafka producer with %s secs timeout.", str(self), timeout)
-        self.flush(timeout)
+        log.info("Closing the Kafka producer with %s secs timeout.", timeout)
        invoked_from_callback = bool(threading.current_thread() is self._sender)
        if timeout > 0:
            if invoked_from_callback:
-                log.warning("%s: Overriding close timeout %s secs to 0 in order to"
+                log.warning("Overriding close timeout %s secs to 0 in order to"
                            " prevent useless blocking due to self-join. This"
                            " means you have incorrectly invoked close with a"
                            " non-zero timeout from the producer call-back.",
-                            str(self), timeout)
+                            timeout)
            else:
                # Try to close gracefully.
                if self._sender is not None:
@@ -643,13 +496,12 @@ class KafkaProducer(object):
                    self._sender.join(timeout)

        if self._sender is not None and self._sender.is_alive():
-            log.info("%s: Proceeding to force close the producer since pending"
+            log.info("Proceeding to force close the producer since pending"
                     " requests could not be completed within timeout %s.",
-                     str(self), timeout)
+                     timeout)
            self._sender.force_close()

-        if self._metrics:
-            self._metrics.close()
+        self._metrics.close()
        try:
            self.config['key_serializer'].close()
        except AttributeError:
@@ -659,23 +511,23 @@ class KafkaProducer(object):
        except AttributeError:
            pass
        self._closed = True
-        log.debug("%s: The Kafka producer has closed.", str(self))
+        log.debug("The Kafka producer has closed.")

    def partitions_for(self, topic):
        """Returns set of all known partitions for the topic."""
-        return self._wait_on_metadata(topic, self.config['max_block_ms'])
+        max_wait = self.config['max_block_ms'] / 1000.0
+        return self._wait_on_metadata(topic, max_wait)

-    @classmethod
-    def max_usable_produce_magic(cls, api_version):
-        if api_version >= (0, 11):
+    def _max_usable_produce_magic(self):
+        if self.config['api_version'] >= (0, 11):
            return 2
-        elif api_version >= (0, 10, 0):
+        elif self.config['api_version'] >= (0, 10):
            return 1
        else:
            return 0

    def _estimate_size_in_bytes(self, key, value, headers=[]):
-        magic = self.max_usable_produce_magic(self.config['api_version'])
+        magic = self._max_usable_produce_magic()
        if magic == 2:
            return DefaultRecordBatchBuilder.estimate_size_in_bytes(
                key, value, headers)
@@ -683,114 +535,6 @@ class KafkaProducer(object):
            return LegacyRecordBatchBuilder.estimate_size_in_bytes(
                magic, self.config['compression_type'], key, value)

-    def init_transactions(self):
-        """
-        Needs to be called before any other methods when the transactional.id is set in the configuration.
-
-        This method does the following:
-          1. Ensures any transactions initiated by previous instances of the producer with the same
-             transactional_id are completed. If the previous instance had failed with a transaction in
-             progress, it will be aborted. If the last transaction had begun completion,
-             but not yet finished, this method awaits its completion.
-          2. Gets the internal producer id and epoch, used in all future transactional
-             messages issued by the producer.
-
-        Note that this method will raise KafkaTimeoutError if the transactional state cannot
-        be initialized before expiration of `max_block_ms`.
-
-        Retrying after a KafkaTimeoutError will continue to wait for the prior request to succeed or fail.
-        Retrying after any other exception will start a new initialization attempt.
-        Retrying after a successful initialization will do nothing.
-
-        Raises:
-            IllegalStateError: if no transactional_id has been configured
-            AuthorizationError: fatal error indicating that the configured
-                transactional_id is not authorized.
-            KafkaError: if the producer has encountered a previous fatal error or for any other unexpected error
-            KafkaTimeoutError: if the time taken for initialize the transaction has surpassed `max.block.ms`.
-        """
-        if not self._transaction_manager:
-            raise Errors.IllegalStateError("Cannot call init_transactions without setting a transactional_id.")
-        if self._init_transactions_result is None:
-            self._init_transactions_result = self._transaction_manager.initialize_transactions()
-            self._sender.wakeup()
-
-        try:
-            if not self._init_transactions_result.wait(timeout_ms=self.config['max_block_ms']):
-                raise Errors.KafkaTimeoutError("Timeout expired while initializing transactional state in %s ms." % (self.config['max_block_ms'],))
-        finally:
-            if self._init_transactions_result.failed:
-                self._init_transactions_result = None
-
-    def begin_transaction(self):
-        """ Should be called before the start of each new transaction.
-
-        Note that prior to the first invocation of this method,
-        you must invoke `init_transactions()` exactly one time.
-
-        Raises:
-            ProducerFencedError if another producer is with the same
-                transactional_id is active.
-        """
-        # Set the transactional bit in the producer.
-        if not self._transaction_manager:
-            raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions")
-        self._transaction_manager.begin_transaction()
-
-    def send_offsets_to_transaction(self, offsets, consumer_group_id):
-        """
-        Sends a list of consumed offsets to the consumer group coordinator, and also marks
-        those offsets as part of the current transaction. These offsets will be considered
-        consumed only if the transaction is committed successfully.
-
-        This method should be used when you need to batch consumed and produced messages
-        together, typically in a consume-transform-produce pattern.
-
-        Arguments:
-            offsets ({TopicPartition: OffsetAndMetadata}): map of topic-partition -> offsets to commit
-                as part of current transaction.
-            consumer_group_id (str): Name of consumer group for offsets commit.
-
-        Raises:
-            IllegalStateError: if no transactional_id, or transaction has not been started.
-            ProducerFencedError: fatal error indicating another producer with the same transactional_id is active.
-            UnsupportedVersionError: fatal error indicating the broker does not support transactions (i.e. if < 0.11).
-            UnsupportedForMessageFormatError: fatal error indicating the message format used for the offsets
-                topic on the broker does not support transactions.
-            AuthorizationError: fatal error indicating that the configured transactional_id is not authorized.
-            KafkaErro:r if the producer has encountered a previous fatal or abortable error, or for any
-                other unexpected error
-        """
-        if not self._transaction_manager:
-            raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions")
-        result = self._transaction_manager.send_offsets_to_transaction(offsets, consumer_group_id)
-        self._sender.wakeup()
-        result.wait()
-
-    def commit_transaction(self):
-        """ Commits the ongoing transaction.
-
-        Raises: ProducerFencedError if another producer with the same
-                transactional_id is active.
-        """
-        if not self._transaction_manager:
-            raise Errors.IllegalStateError("Cannot commit transaction since transactions are not enabled")
-        result = self._transaction_manager.begin_commit()
-        self._sender.wakeup()
-        result.wait()
-
-    def abort_transaction(self):
-        """ Aborts the ongoing transaction.
-
-        Raises: ProducerFencedError if another producer with the same
-                transactional_id is active.
-        """
-        if not self._transaction_manager:
-            raise Errors.IllegalStateError("Cannot abort transaction since transactions are not enabled.")
-        result = self._transaction_manager.begin_abort()
-        self._sender.wakeup()
-        result.wait()
-
    def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None):
        """Publish a message to a topic.

@@ -823,58 +567,44 @@ class KafkaProducer(object):
        Raises:
            KafkaTimeoutError: if unable to fetch topic metadata, or unable
                to obtain memory buffer prior to configured max_block_ms
-            TypeError: if topic is not a string
-            ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
-            AssertionError: if KafkaProducer is closed, or key and value are both None
        """
-        assert not self._closed, 'KafkaProducer already closed!'
        assert value is not None or self.config['api_version'] >= (0, 8, 1), (
            'Null messages require kafka >= 0.8.1')
        assert not (value is None and key is None), 'Need at least one: key or value'
-        ensure_valid_topic_name(topic)
        key_bytes = value_bytes = None
-        timer = Timer(self.config['max_block_ms'], "Failed to assign partition for message in max_block_ms.")
        try:
-            assigned_partition = None
-            while assigned_partition is None and not timer.expired:
-                self._wait_on_metadata(topic, timer.timeout_ms)
+            self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)

-                key_bytes = self._serialize(
-                    self.config['key_serializer'],
-                    topic, key)
-                value_bytes = self._serialize(
-                    self.config['value_serializer'],
-                    topic, value)
-                assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
-                assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))
+            key_bytes = self._serialize(
+                self.config['key_serializer'],
+                topic, key)
+            value_bytes = self._serialize(
+                self.config['value_serializer'],
+                topic, value)
+            assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
+            assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))

-                assigned_partition = self._partition(topic, partition, key, value,
-                                                     key_bytes, value_bytes)
-            if assigned_partition is None:
-                raise Errors.KafkaTimeoutError("Failed to assign partition for message after %s secs." % timer.elapsed_ms / 1000)
-            else:
-                partition = assigned_partition
+            partition = self._partition(topic, partition, key, value,
+                                        key_bytes, value_bytes)

            if headers is None:
                headers = []
-            assert isinstance(headers, list)
-            assert all(isinstance(item, tuple) and len(item) == 2 and isinstance(item[0], str) and isinstance(item[1], bytes) for item in headers)
+            assert type(headers) == list
+            assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers)

            message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers)
            self._ensure_valid_record_size(message_size)

            tp = TopicPartition(topic, partition)
-            log.debug("%s: Sending (key=%r value=%r headers=%r) to %s", str(self), key, value, headers, tp)
-
-            if self._transaction_manager and self._transaction_manager.is_transactional():
-                self._transaction_manager.maybe_add_partition_to_transaction(tp)
-
+            log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp)
            result = self._accumulator.append(tp, timestamp_ms,
-                                              key_bytes, value_bytes, headers)
+                                              key_bytes, value_bytes, headers,
+                                              self.config['max_block_ms'],
+                                              estimated_size=message_size)
            future, batch_is_full, new_batch_created = result
            if batch_is_full or new_batch_created:
-                log.debug("%s: Waking up the sender since %s is either full or"
-                          " getting a new batch", str(self), tp)
+                log.debug("Waking up the sender since %s is either full or"
+                          " getting a new batch", tp)
                self._sender.wakeup()

            return future
@@ -882,7 +612,7 @@ class KafkaProducer(object):
            # for API exceptions return them in the future,
            # for other exceptions raise directly
        except Errors.BrokerResponseError as e:
-            log.error("%s: Exception occurred during message send: %s", str(self), e)
+            log.debug("Exception occurred during message send: %s", e)
            return FutureRecordMetadata(
                FutureProduceResult(TopicPartition(topic, partition)),
                -1, None, None,
@@ -913,7 +643,7 @@ class KafkaProducer(object):
            KafkaTimeoutError: failure to flush buffered records within the
                provided timeout
        """
-        log.debug("%s: Flushing accumulated records in producer.", str(self))
+        log.debug("Flushing accumulated records in producer.")  # trace
        self._accumulator.begin_flush()
        self._sender.wakeup()
        self._accumulator.await_flush_completion(timeout=timeout)
@@ -925,8 +655,13 @@ class KafkaProducer(object):
                "The message is %d bytes when serialized which is larger than"
                " the maximum request size you have configured with the"
                " max_request_size configuration" % (size,))
+        if size > self.config['buffer_memory']:
+            raise Errors.MessageSizeTooLargeError(
+                "The message is %d bytes when serialized which is larger than"
+                " the total memory buffer you have configured with the"
+                " buffer_memory configuration." % (size,))

-    def _wait_on_metadata(self, topic, max_wait_ms):
+    def _wait_on_metadata(self, topic, max_wait):
        """
        Wait for cluster metadata including partitions for the given topic to
        be available.
@@ -944,31 +679,32 @@ class KafkaProducer(object):
        """
        # add topic to metadata topic list if it is not there already.
        self._sender.add_topic(topic)
-        timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
+        begin = time.time()
+        elapsed = 0.0
        metadata_event = None
        while True:
            partitions = self._metadata.partitions_for_topic(topic)
            if partitions is not None:
                return partitions
-            timer.maybe_raise()
+
            if not metadata_event:
                metadata_event = threading.Event()

-            log.debug("%s: Requesting metadata update for topic %s", str(self), topic)
+            log.debug("Requesting metadata update for topic %s", topic)
+
            metadata_event.clear()
            future = self._metadata.request_update()
            future.add_both(lambda e, *args: e.set(), metadata_event)
            self._sender.wakeup()
-            metadata_event.wait(timer.timeout_ms / 1000)
-            if not future.is_done:
+            metadata_event.wait(max_wait - elapsed)
+            elapsed = time.time() - begin
+            if not metadata_event.is_set():
                raise Errors.KafkaTimeoutError(
-                    "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
-            elif future.failed() and not future.retriable():
-                raise future.exception
+                    "Failed to update metadata after %.1f secs." % (max_wait,))
            elif topic in self._metadata.unauthorized_topics:
-                raise Errors.TopicAuthorizationFailedError(set([topic]))
+                raise Errors.TopicAuthorizationFailedError(topic)
            else:
-                log.debug("%s: _wait_on_metadata woke after %s secs.", str(self), timer.elapsed_ms / 1000)
+                log.debug("_wait_on_metadata woke after %s secs.", elapsed)

    def _serialize(self, f, topic, data):
        if not f:
@@ -979,18 +715,16 @@ class KafkaProducer(object):

    def _partition(self, topic, partition, key, value,
                   serialized_key, serialized_value):
-        all_partitions = self._metadata.partitions_for_topic(topic)
-        available = self._metadata.available_partitions_for_topic(topic)
-        if all_partitions is None or available is None:
-            return None
        if partition is not None:
            assert partition >= 0
-            assert partition in all_partitions, 'Unrecognized partition'
+            assert partition in self._metadata.partitions_for_topic(topic), 'Unrecognized partition'
            return partition

+        all_partitions = sorted(self._metadata.partitions_for_topic(topic))
+        available = list(self._metadata.available_partitions_for_topic(topic))
        return self.config['partitioner'](serialized_key,
-                                          sorted(all_partitions),
-                                          list(available))
+                                          all_partitions,
+                                          available)

    def metrics(self, raw=False):
        """Get metrics on producer performance.
@@ -1002,8 +736,6 @@ class KafkaProducer(object):
            This is an unstable interface. It may change in future
            releases without warning.
        """
-        if not self._metrics:
-            return
        if raw:
            return self._metrics.metrics.copy()

@@ -1015,6 +747,3 @@ class KafkaProducer(object):
                metrics[k.group][k.name] = {}
            metrics[k.group][k.name] = v.value()
        return metrics
-
-    def __str__(self):
-        return "<KafkaProducer client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
--- a/venv/lib/python3.12/site-packages/kafka/producer/record_accumulator.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/record_accumulator.py
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, division
+from __future__ import absolute_import

 import collections
 import copy
@@ -6,14 +6,8 @@ import logging
 import threading
 import time

-try:
-    # enum in stdlib as of py3.4
-    from enum import IntEnum  # pylint: disable=import-error
-except ImportError:
-    # vendored backport module
-    from kafka.vendor.enum34 import IntEnum
-
 import kafka.errors as Errors
+from kafka.producer.buffer import SimpleBufferPool
 from kafka.producer.future import FutureRecordMetadata, FutureProduceResult
 from kafka.record.memory_records import MemoryRecordsBuilder
 from kafka.structs import TopicPartition
@@ -41,16 +35,10 @@ class AtomicInteger(object):
        return self._val


-class FinalState(IntEnum):
-    ABORTED = 0
-    FAILED = 1
-    SUCCEEDED = 2
-
-
 class ProducerBatch(object):
-    def __init__(self, tp, records, now=None):
-        now = time.time() if now is None else now
+    def __init__(self, tp, records, buffer):
        self.max_record_size = 0
+        now = time.time()
        self.created = now
        self.drained = None
        self.attempts = 0
@@ -60,120 +48,81 @@ class ProducerBatch(object):
        self.topic_partition = tp
        self.produce_future = FutureProduceResult(tp)
        self._retry = False
-        self._final_state = None
-
-    @property
-    def final_state(self):
-        return self._final_state
+        self._buffer = buffer  # We only save it, we don't write to it

    @property
    def record_count(self):
        return self.records.next_offset()

-    @property
-    def producer_id(self):
-        return self.records.producer_id if self.records else None
-
-    @property
-    def producer_epoch(self):
-        return self.records.producer_epoch if self.records else None
-
-    @property
-    def has_sequence(self):
-        return self.records.has_sequence if self.records else False
-
-    def try_append(self, timestamp_ms, key, value, headers, now=None):
+    def try_append(self, timestamp_ms, key, value, headers):
        metadata = self.records.append(timestamp_ms, key, value, headers)
        if metadata is None:
            return None

-        now = time.time() if now is None else now
        self.max_record_size = max(self.max_record_size, metadata.size)
-        self.last_append = now
-        future = FutureRecordMetadata(
-            self.produce_future,
-            metadata.offset,
-            metadata.timestamp,
-            metadata.crc,
-            len(key) if key is not None else -1,
-            len(value) if value is not None else -1,
-            sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1)
+        self.last_append = time.time()
+        future = FutureRecordMetadata(self.produce_future, metadata.offset,
+                                      metadata.timestamp, metadata.crc,
+                                      len(key) if key is not None else -1,
+                                      len(value) if value is not None else -1,
+                                      sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1)
        return future

-    def abort(self, exception):
-        """Abort the batch and complete the future and callbacks."""
-        if self._final_state is not None:
-            raise Errors.IllegalStateError("Batch has already been completed in final state: %s" % self._final_state)
-        self._final_state = FinalState.ABORTED
-
-        log.debug("Aborting batch for partition %s: %s", self.topic_partition, exception)
-        self._complete_future(-1, -1, exception)
-
-    def done(self, base_offset=None, timestamp_ms=None, exception=None):
-        """
-        Finalize the state of a batch. Final state, once set, is immutable. This function may be called
-        once or twice on a batch. It may be called twice if
-            1. An inflight batch expires before a response from the broker is received. The batch's final
-            state is set to FAILED. But it could succeed on the broker and second time around batch.done() may
-            try to set SUCCEEDED final state.
-
-            2. If a transaction abortion happens or if the producer is closed forcefully, the final state is
-            ABORTED but again it could succeed if broker responds with a success.
-
-        Attempted transitions from [FAILED | ABORTED] --> SUCCEEDED are logged.
-        Attempted transitions from one failure state to the same or a different failed state are ignored.
-        Attempted transitions from SUCCEEDED to the same or a failed state throw an exception.
-        """
-        final_state = FinalState.SUCCEEDED if exception is None else FinalState.FAILED
-        if self._final_state is None:
-            self._final_state = final_state
-            if final_state is FinalState.SUCCEEDED:
-                log.debug("Successfully produced messages to %s with base offset %s", self.topic_partition, base_offset)
-            else:
-                log.warning("Failed to produce messages to topic-partition %s with base offset %s: %s",
-                            self.topic_partition, base_offset, exception)
-            self._complete_future(base_offset, timestamp_ms, exception)
-            return True
-
-        elif self._final_state is not FinalState.SUCCEEDED:
-            if final_state is FinalState.SUCCEEDED:
-                # Log if a previously unsuccessful batch succeeded later on.
-                log.debug("ProduceResponse returned %s for %s after batch with base offset %s had already been %s.",
-                          final_state, self.topic_partition, base_offset, self._final_state)
-            else:
-                # FAILED --> FAILED and ABORTED --> FAILED transitions are ignored.
-                log.debug("Ignored state transition %s -> %s for %s batch with base offset %s",
-                          self._final_state, final_state, self.topic_partition, base_offset)
-        else:
-            # A SUCCESSFUL batch must not attempt another state change.
-            raise Errors.IllegalStateError("A %s batch must not attempt another state change to %s" % (self._final_state, final_state))
-        return False
-
-    def _complete_future(self, base_offset, timestamp_ms, exception):
+    def done(self, base_offset=None, timestamp_ms=None, exception=None, log_start_offset=None, global_error=None):
+        level = logging.DEBUG if exception is None else logging.WARNING
+        log.log(level, "Produced messages to topic-partition %s with base offset"
+                  " %s log start offset %s and error %s.", self.topic_partition, base_offset,
+                  log_start_offset, global_error)  # trace
        if self.produce_future.is_done:
-            raise Errors.IllegalStateError('Batch is already closed!')
+            log.warning('Batch is already closed -- ignoring batch.done()')
+            return
        elif exception is None:
-            self.produce_future.success((base_offset, timestamp_ms))
+            self.produce_future.success((base_offset, timestamp_ms, log_start_offset))
        else:
            self.produce_future.failure(exception)

-    def has_reached_delivery_timeout(self, delivery_timeout_ms, now=None):
-        now = time.time() if now is None else now
-        return delivery_timeout_ms / 1000 <= now - self.created
+    def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full):
+        """Expire batches if metadata is not available
+
+        A batch whose metadata is not available should be expired if one
+        of the following is true:
+
+          * the batch is not in retry AND request timeout has elapsed after
+            it is ready (full or linger.ms has reached).
+
+          * the batch is in retry AND request timeout has elapsed after the
+            backoff period ended.
+        """
+        now = time.time()
+        since_append = now - self.last_append
+        since_ready = now - (self.created + linger_ms / 1000.0)
+        since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0)
+        timeout = request_timeout_ms / 1000.0
+
+        error = None
+        if not self.in_retry() and is_full and timeout < since_append:
+            error = "%d seconds have passed since last append" % (since_append,)
+        elif not self.in_retry() and timeout < since_ready:
+            error = "%d seconds have passed since batch creation plus linger time" % (since_ready,)
+        elif self.in_retry() and timeout < since_backoff:
+            error = "%d seconds have passed since last attempt plus backoff time" % (since_backoff,)
+
+        if error:
+            self.records.close()
+            self.done(-1, None, Errors.KafkaTimeoutError(
+                "Batch for %s containing %s record(s) expired: %s" % (
+                self.topic_partition, self.records.next_offset(), error)))
+            return True
+        return False

    def in_retry(self):
        return self._retry

-    def retry(self, now=None):
-        now = time.time() if now is None else now
+    def set_retry(self):
        self._retry = True
-        self.attempts += 1
-        self.last_attempt = now
-        self.last_append = now

-    @property
-    def is_done(self):
-        return self.produce_future.is_done
+    def buffer(self):
+        return self._buffer

    def __str__(self):
        return 'ProducerBatch(topic_partition=%s, record_count=%d)' % (
@@ -194,6 +143,12 @@ class RecordAccumulator(object):
            A small batch size will make batching less common and may reduce
            throughput (a batch size of zero will disable batching entirely).
            Default: 16384
+        buffer_memory (int): The total bytes of memory the producer should use
+            to buffer records waiting to be sent to the server. If records are
+            sent faster than they can be delivered to the server the producer
+            will block up to max_block_ms, raising an exception on timeout.
+            In the current implementation, this setting is an approximation.
+            Default: 33554432 (32MB)
        compression_attrs (int): The compression type for all data generated by
            the producer. Valid values are gzip(1), snappy(2), lz4(3), or
            none(0).
@@ -201,7 +156,7 @@ class RecordAccumulator(object):
            will also impact the compression ratio (more batching means better
            compression). Default: None.
        linger_ms (int): An artificial delay time to add before declaring a
-            record batch (that isn't full) ready for sending. This allows
+            messageset (that isn't full) ready for sending. This allows
            time for more records to arrive. Setting a non-zero linger_ms
            will trade off some latency for potentially better throughput
            due to more batching (and hence fewer, larger requests).
@@ -211,14 +166,14 @@ class RecordAccumulator(object):
            all retries in a short period of time. Default: 100
    """
    DEFAULT_CONFIG = {
+        'buffer_memory': 33554432,
        'batch_size': 16384,
        'compression_attrs': 0,
        'linger_ms': 0,
-        'request_timeout_ms': 30000,
-        'delivery_timeout_ms': 120000,
        'retry_backoff_ms': 100,
-        'transaction_manager': None,
-        'message_version': 2,
+        'message_version': 0,
+        'metrics': None,
+        'metric_group_prefix': 'producer-metrics',
    }

    def __init__(self, **configs):
@@ -228,37 +183,22 @@ class RecordAccumulator(object):
                self.config[key] = configs.pop(key)

        self._closed = False
-        self._transaction_manager = self.config['transaction_manager']
        self._flushes_in_progress = AtomicInteger()
        self._appends_in_progress = AtomicInteger()
        self._batches = collections.defaultdict(collections.deque) # TopicPartition: [ProducerBatch]
        self._tp_locks = {None: threading.Lock()} # TopicPartition: Lock, plus a lock to add entries
+        self._free = SimpleBufferPool(self.config['buffer_memory'],
+                                      self.config['batch_size'],
+                                      metrics=self.config['metrics'],
+                                      metric_group_prefix=self.config['metric_group_prefix'])
        self._incomplete = IncompleteProducerBatches()
        # The following variables should only be accessed by the sender thread,
        # so we don't need to protect them w/ locking.
        self.muted = set()
        self._drain_index = 0
-        self._next_batch_expiry_time_ms = float('inf')

-        if self.config['delivery_timeout_ms'] < self.config['linger_ms'] + self.config['request_timeout_ms']:
-            raise Errors.KafkaConfigurationError("Must set delivery_timeout_ms higher than linger_ms + request_timeout_ms")
-
-    @property
-    def delivery_timeout_ms(self):
-        return self.config['delivery_timeout_ms']
-
-    @property
-    def next_expiry_time_ms(self):
-        return self._next_batch_expiry_time_ms
-
-    def _tp_lock(self, tp):
-        if tp not in self._tp_locks:
-            with self._tp_locks[None]:
-                if tp not in self._tp_locks:
-                    self._tp_locks[tp] = threading.Lock()
-        return self._tp_locks[tp]
-
-    def append(self, tp, timestamp_ms, key, value, headers, now=None):
+    def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms,
+               estimated_size=0):
        """Add a record to the accumulator, return the append result.

        The append result will contain the future metadata, and flag for
@@ -271,53 +211,59 @@ class RecordAccumulator(object):
            key (bytes): The key for the record
            value (bytes): The value for the record
            headers (List[Tuple[str, bytes]]): The header fields for the record
+            max_time_to_block_ms (int): The maximum time in milliseconds to
+                block for buffer memory to be available

        Returns:
            tuple: (future, batch_is_full, new_batch_created)
        """
        assert isinstance(tp, TopicPartition), 'not TopicPartition'
        assert not self._closed, 'RecordAccumulator is closed'
-        now = time.time() if now is None else now
        # We keep track of the number of appending thread to make sure we do
        # not miss batches in abortIncompleteBatches().
        self._appends_in_progress.increment()
        try:
-            with self._tp_lock(tp):
+            if tp not in self._tp_locks:
+                with self._tp_locks[None]:
+                    if tp not in self._tp_locks:
+                        self._tp_locks[tp] = threading.Lock()
+
+            with self._tp_locks[tp]:
                # check if we have an in-progress batch
                dq = self._batches[tp]
                if dq:
                    last = dq[-1]
-                    future = last.try_append(timestamp_ms, key, value, headers, now=now)
+                    future = last.try_append(timestamp_ms, key, value, headers)
                    if future is not None:
                        batch_is_full = len(dq) > 1 or last.records.is_full()
                        return future, batch_is_full, False

-            with self._tp_lock(tp):
+            size = max(self.config['batch_size'], estimated_size)
+            log.debug("Allocating a new %d byte message buffer for %s", size, tp) # trace
+            buf = self._free.allocate(size, max_time_to_block_ms)
+            with self._tp_locks[tp]:
                # Need to check if producer is closed again after grabbing the
                # dequeue lock.
                assert not self._closed, 'RecordAccumulator is closed'

                if dq:
                    last = dq[-1]
-                    future = last.try_append(timestamp_ms, key, value, headers, now=now)
+                    future = last.try_append(timestamp_ms, key, value, headers)
                    if future is not None:
                        # Somebody else found us a batch, return the one we
                        # waited for! Hopefully this doesn't happen often...
+                        self._free.deallocate(buf)
                        batch_is_full = len(dq) > 1 or last.records.is_full()
                        return future, batch_is_full, False

-                if self._transaction_manager and self.config['message_version'] < 2:
-                    raise Errors.UnsupportedVersionError("Attempting to use idempotence with a broker which"
-                                                         " does not support the required message format (v2)."
-                                                         " The broker must be version 0.11 or later.")
                records = MemoryRecordsBuilder(
                    self.config['message_version'],
                    self.config['compression_attrs'],
                    self.config['batch_size']
                )

-                batch = ProducerBatch(tp, records, now=now)
-                future = batch.try_append(timestamp_ms, key, value, headers, now=now)
+                batch = ProducerBatch(tp, records, buf)
+                future = batch.try_append(timestamp_ms, key, value, headers)
                if not future:
                    raise Exception()

@@ -328,43 +274,79 @@ class RecordAccumulator(object):
        finally:
            self._appends_in_progress.decrement()

-    def reset_next_batch_expiry_time(self):
-        self._next_batch_expiry_time_ms = float('inf')
+    def abort_expired_batches(self, request_timeout_ms, cluster):
+        """Abort the batches that have been sitting in RecordAccumulator for
+        more than the configured request_timeout due to metadata being
+        unavailable.

-    def maybe_update_next_batch_expiry_time(self, batch):
-        self._next_batch_expiry_time_ms = min(self._next_batch_expiry_time_ms, batch.created * 1000 + self.delivery_timeout_ms)
+        Arguments:
+            request_timeout_ms (int): milliseconds to timeout
+            cluster (ClusterMetadata): current metadata for kafka cluster

-    def expired_batches(self, now=None):
-        """Get a list of batches which have been sitting in the accumulator too long and need to be expired."""
+        Returns:
+            list of ProducerBatch that were expired
+        """
        expired_batches = []
+        to_remove = []
+        count = 0
        for tp in list(self._batches.keys()):
-            with self._tp_lock(tp):
+            assert tp in self._tp_locks, 'TopicPartition not in locks dict'
+
+            # We only check if the batch should be expired if the partition
+            # does not have a batch in flight. This is to avoid the later
+            # batches get expired when an earlier batch is still in progress.
+            # This protection only takes effect when user sets
+            # max.in.flight.request.per.connection=1. Otherwise the expiration
+            # order is not guranteed.
+            if tp in self.muted:
+                continue
+
+            with self._tp_locks[tp]:
                # iterate over the batches and expire them if they have stayed
                # in accumulator for more than request_timeout_ms
                dq = self._batches[tp]
-                while dq:
-                    batch = dq[0]
-                    if batch.has_reached_delivery_timeout(self.delivery_timeout_ms, now=now):
-                        dq.popleft()
-                        batch.records.close()
+                for batch in dq:
+                    is_full = bool(bool(batch != dq[-1]) or batch.records.is_full())
+                    # check if the batch is expired
+                    if batch.maybe_expire(request_timeout_ms,
+                                          self.config['retry_backoff_ms'],
+                                          self.config['linger_ms'],
+                                          is_full):
                        expired_batches.append(batch)
+                        to_remove.append(batch)
+                        count += 1
+                        self.deallocate(batch)
                    else:
                        # Stop at the first batch that has not expired.
-                        self.maybe_update_next_batch_expiry_time(batch)
                        break
+
+                # Python does not allow us to mutate the dq during iteration
+                # Assuming expired batches are infrequent, this is better than
+                # creating a new copy of the deque for iteration on every loop
+                if to_remove:
+                    for batch in to_remove:
+                        dq.remove(batch)
+                    to_remove = []
+
+        if expired_batches:
+            log.warning("Expired %d batches in accumulator", count) # trace
+
        return expired_batches

-    def reenqueue(self, batch, now=None):
-        """
-        Re-enqueue the given record batch in the accumulator. In Sender._complete_batch method, we check
-        whether the batch has reached delivery_timeout_ms or not. Hence we do not do the delivery timeout check here.
-        """
-        batch.retry(now=now)
-        with self._tp_lock(batch.topic_partition):
-            dq = self._batches[batch.topic_partition]
+    def reenqueue(self, batch):
+        """Re-enqueue the given record batch in the accumulator to retry."""
+        now = time.time()
+        batch.attempts += 1
+        batch.last_attempt = now
+        batch.last_append = now
+        batch.set_retry()
+        assert batch.topic_partition in self._tp_locks, 'TopicPartition not in locks dict'
+        assert batch.topic_partition in self._batches, 'TopicPartition not in batches'
+        dq = self._batches[batch.topic_partition]
+        with self._tp_locks[batch.topic_partition]:
            dq.appendleft(batch)

-    def ready(self, cluster, now=None):
+    def ready(self, cluster):
        """
        Get a list of nodes whose partitions are ready to be sent, and the
        earliest time at which any non-sendable partition will be ready;
@@ -398,8 +380,9 @@ class RecordAccumulator(object):
        ready_nodes = set()
        next_ready_check = 9999999.99
        unknown_leaders_exist = False
-        now = time.time() if now is None else now
+        now = time.time()

+        exhausted = bool(self._free.queued() > 0)
        # several threads are accessing self._batches -- to simplify
        # concurrent access, we iterate over a snapshot of partitions
        # and lock each partition separately as needed
@@ -414,23 +397,23 @@ class RecordAccumulator(object):
            elif tp in self.muted:
                continue

-            with self._tp_lock(tp):
+            with self._tp_locks[tp]:
                dq = self._batches[tp]
                if not dq:
                    continue
                batch = dq[0]
-                retry_backoff = self.config['retry_backoff_ms'] / 1000
-                linger = self.config['linger_ms'] / 1000
-                backing_off = bool(batch.attempts > 0
-                                   and (batch.last_attempt + retry_backoff) > now)
+                retry_backoff = self.config['retry_backoff_ms'] / 1000.0
+                linger = self.config['linger_ms'] / 1000.0
+                backing_off = bool(batch.attempts > 0 and
+                                   batch.last_attempt + retry_backoff > now)
                waited_time = now - batch.last_attempt
                time_to_wait = retry_backoff if backing_off else linger
                time_left = max(time_to_wait - waited_time, 0)
                full = bool(len(dq) > 1 or batch.records.is_full())
                expired = bool(waited_time >= time_to_wait)

-                sendable = (full or expired or self._closed or
-                            self.flush_in_progress())
+                sendable = (full or expired or exhausted or self._closed or
+                            self._flush_in_progress())

                if sendable and not backing_off:
                    ready_nodes.add(leader)
@@ -444,98 +427,16 @@ class RecordAccumulator(object):

        return ready_nodes, next_ready_check, unknown_leaders_exist

-    def has_undrained(self):
-        """Check whether there are any batches which haven't been drained"""
+    def has_unsent(self):
+        """Return whether there is any unsent record in the accumulator."""
        for tp in list(self._batches.keys()):
-            with self._tp_lock(tp):
+            with self._tp_locks[tp]:
                dq = self._batches[tp]
                if len(dq):
                    return True
        return False

-    def _should_stop_drain_batches_for_partition(self, first, tp):
-        if self._transaction_manager:
-            if not self._transaction_manager.is_send_to_partition_allowed(tp):
-                return True
-            if not self._transaction_manager.producer_id_and_epoch.is_valid:
-                # we cannot send the batch until we have refreshed the PID
-                log.debug("Waiting to send ready batches because transaction producer id is not valid")
-                return True
-        return False
-
-    def drain_batches_for_one_node(self, cluster, node_id, max_size, now=None):
-        now = time.time() if now is None else now
-        size = 0
-        ready = []
-        partitions = list(cluster.partitions_for_broker(node_id))
-        if not partitions:
-            return ready
-        # to make starvation less likely this loop doesn't start at 0
-        self._drain_index %= len(partitions)
-        start = None
-        while start != self._drain_index:
-            tp = partitions[self._drain_index]
-            if start is None:
-                start = self._drain_index
-            self._drain_index += 1
-            self._drain_index %= len(partitions)
-
-            # Only proceed if the partition has no in-flight batches.
-            if tp in self.muted:
-                continue
-
-            if tp not in self._batches:
-                continue
-
-            with self._tp_lock(tp):
-                dq = self._batches[tp]
-                if len(dq) == 0:
-                    continue
-                first = dq[0]
-                backoff = bool(first.attempts > 0 and
-                               first.last_attempt + self.config['retry_backoff_ms'] / 1000 > now)
-                # Only drain the batch if it is not during backoff
-                if backoff:
-                    continue
-
-                if (size + first.records.size_in_bytes() > max_size
-                    and len(ready) > 0):
-                    # there is a rare case that a single batch
-                    # size is larger than the request size due
-                    # to compression; in this case we will
-                    # still eventually send this batch in a
-                    # single request
-                    break
-                else:
-                    if self._should_stop_drain_batches_for_partition(first, tp):
-                        break
-
-                    batch = dq.popleft()
-                    if self._transaction_manager and not batch.in_retry():
-                        # If the batch is in retry, then we should not change the pid and
-                        # sequence number, since this may introduce duplicates. In particular,
-                        # the previous attempt may actually have been accepted, and if we change
-                        # the pid and sequence here, this attempt will also be accepted, causing
-                        # a duplicate.
-                        sequence_number = self._transaction_manager.sequence_number(batch.topic_partition)
-                        log.debug("Dest: %s: %s producer_id=%s epoch=%s sequence=%s",
-                                  node_id, batch.topic_partition,
-                                  self._transaction_manager.producer_id_and_epoch.producer_id,
-                                  self._transaction_manager.producer_id_and_epoch.epoch,
-                                  sequence_number)
-                        batch.records.set_producer_state(
-                            self._transaction_manager.producer_id_and_epoch.producer_id,
-                            self._transaction_manager.producer_id_and_epoch.epoch,
-                            sequence_number,
-                            self._transaction_manager.is_transactional()
-                        )
-                    batch.records.close()
-                    size += batch.records.size_in_bytes()
-                    ready.append(batch)
-                    batch.drained = now
-        return ready
-
-    def drain(self, cluster, nodes, max_size, now=None):
+    def drain(self, cluster, nodes, max_size):
        """
        Drain all the data for the given nodes and collate them into a list of
        batches that will fit within the specified size on a per-node basis.
@@ -553,17 +454,59 @@ class RecordAccumulator(object):
        if not nodes:
            return {}

-        now = time.time() if now is None else now
+        now = time.time()
        batches = {}
        for node_id in nodes:
-            batches[node_id] = self.drain_batches_for_one_node(cluster, node_id, max_size, now=now)
+            size = 0
+            partitions = list(cluster.partitions_for_broker(node_id))
+            ready = []
+            # to make starvation less likely this loop doesn't start at 0
+            self._drain_index %= len(partitions)
+            start = self._drain_index
+            while True:
+                tp = partitions[self._drain_index]
+                if tp in self._batches and tp not in self.muted:
+                    with self._tp_locks[tp]:
+                        dq = self._batches[tp]
+                        if dq:
+                            first = dq[0]
+                            backoff = (
+                                bool(first.attempts > 0) and
+                                bool(first.last_attempt +
+                                     self.config['retry_backoff_ms'] / 1000.0
+                                     > now)
+                            )
+                            # Only drain the batch if it is not during backoff
+                            if not backoff:
+                                if (size + first.records.size_in_bytes() > max_size
+                                    and len(ready) > 0):
+                                    # there is a rare case that a single batch
+                                    # size is larger than the request size due
+                                    # to compression; in this case we will
+                                    # still eventually send this batch in a
+                                    # single request
+                                    break
+                                else:
+                                    batch = dq.popleft()
+                                    batch.records.close()
+                                    size += batch.records.size_in_bytes()
+                                    ready.append(batch)
+                                    batch.drained = now
+
+                self._drain_index += 1
+                self._drain_index %= len(partitions)
+                if start == self._drain_index:
+                    break
+
+            batches[node_id] = ready
        return batches

    def deallocate(self, batch):
        """Deallocate the record batch."""
        self._incomplete.remove(batch)
+        self._free.deallocate(batch.buffer())

-    def flush_in_progress(self):
+    def _flush_in_progress(self):
        """Are there any threads currently waiting on a flush?"""
        return self._flushes_in_progress.get() > 0

@@ -592,10 +535,6 @@ class RecordAccumulator(object):
        finally:
            self._flushes_in_progress.decrement()

-    @property
-    def has_incomplete(self):
-        return bool(self._incomplete)
-
    def abort_incomplete_batches(self):
        """
        This function is only called when sender is closed forcefully. It will fail all the
@@ -605,41 +544,27 @@ class RecordAccumulator(object):
        # 1. Avoid losing batches.
        # 2. Free up memory in case appending threads are blocked on buffer full.
        # This is a tight loop but should be able to get through very quickly.
-        error = Errors.IllegalStateError("Producer is closed forcefully.")
        while True:
-            self._abort_batches(error)
+            self._abort_batches()
            if not self._appends_in_progress.get():
                break
        # After this point, no thread will append any messages because they will see the close
        # flag set. We need to do the last abort after no thread was appending in case the there was a new
        # batch appended by the last appending thread.
-        self._abort_batches(error)
+        self._abort_batches()
        self._batches.clear()

-    def _abort_batches(self, error):
+    def _abort_batches(self):
        """Go through incomplete batches and abort them."""
+        error = Errors.IllegalStateError("Producer is closed forcefully.")
        for batch in self._incomplete.all():
            tp = batch.topic_partition
            # Close the batch before aborting
-            with self._tp_lock(tp):
+            with self._tp_locks[tp]:
                batch.records.close()
-                self._batches[tp].remove(batch)
-            batch.abort(error)
+            batch.done(exception=error)
            self.deallocate(batch)

-    def abort_undrained_batches(self, error):
-        for batch in self._incomplete.all():
-            tp = batch.topic_partition
-            with self._tp_lock(tp):
-                aborted = False
-                if not batch.is_done:
-                    aborted = True
-                    batch.records.close()
-                    self._batches[tp].remove(batch)
-            if aborted:
-                batch.abort(error)
-                self.deallocate(batch)
-
    def close(self):
        """Close this accumulator and force all the record buffers to be drained."""
        self._closed = True
@@ -654,21 +579,12 @@ class IncompleteProducerBatches(object):

    def add(self, batch):
        with self._lock:
-            self._incomplete.add(batch)
+            return self._incomplete.add(batch)

    def remove(self, batch):
        with self._lock:
-            try:
-                self._incomplete.remove(batch)
-            except KeyError:
-                pass
+            return self._incomplete.remove(batch)

    def all(self):
        with self._lock:
            return list(self._incomplete)
-
-    def __bool__(self):
-        return bool(self._incomplete)
-
-
-    __nonzero__ = __bool__
--- a/venv/lib/python3.12/site-packages/kafka/producer/sender.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/sender.py
@@ -2,7 +2,6 @@ from __future__ import absolute_import, division

 import collections
 import copy
-import heapq
 import logging
 import threading
 import time
@@ -12,8 +11,6 @@ from kafka.vendor import six
 from kafka import errors as Errors
 from kafka.metrics.measurable import AnonMeasurable
 from kafka.metrics.stats import Avg, Max, Rate
-from kafka.producer.transaction_manager import ProducerIdAndEpoch
-from kafka.protocol.init_producer_id import InitProducerIdRequest
 from kafka.protocol.produce import ProduceRequest
 from kafka.structs import TopicPartition
 from kafka.version import __version__
@@ -30,18 +27,14 @@ class Sender(threading.Thread):
    DEFAULT_CONFIG = {
        'max_request_size': 1048576,
        'acks': 1,
-        'retries': float('inf'),
+        'retries': 0,
        'request_timeout_ms': 30000,
-        'retry_backoff_ms': 100,
-        'metrics': None,
        'guarantee_message_order': False,
-        'transaction_manager': None,
-        'transactional_id': None,
-        'transaction_timeout_ms': 60000,
        'client_id': 'kafka-python-' + __version__,
+        'api_version': (0, 8, 0),
    }

-    def __init__(self, client, metadata, accumulator, **configs):
+    def __init__(self, client, metadata, accumulator, metrics, **configs):
        super(Sender, self).__init__()
        self.config = copy.copy(self.DEFAULT_CONFIG)
        for key in self.config:
@@ -55,75 +48,32 @@ class Sender(threading.Thread):
        self._running = True
        self._force_close = False
        self._topics_to_add = set()
-        if self.config['metrics']:
-            self._sensors = SenderMetrics(self.config['metrics'], self._client, self._metadata)
-        else:
-            self._sensors = None
-        self._transaction_manager = self.config['transaction_manager']
-        # A per-partition queue of batches ordered by creation time for tracking the in-flight batches
-        self._in_flight_batches = collections.defaultdict(list)
-
-    def _maybe_remove_from_inflight_batches(self, batch):
-        try:
-            queue = self._in_flight_batches[batch.topic_partition]
-        except KeyError:
-            return
-        try:
-            idx = queue.index((batch.created, batch))
-        except ValueError:
-            return
-        # https://stackoverflow.com/questions/10162679/python-delete-element-from-heap
-        queue[idx] = queue[-1]
-        queue.pop()
-        heapq.heapify(queue)
-
-    def _get_expired_inflight_batches(self, now=None):
-        """Get the in-flight batches that has reached delivery timeout."""
-        expired_batches = []
-        to_remove = []
-        for tp, queue in six.iteritems(self._in_flight_batches):
-            while queue:
-                _created_at, batch = queue[0]
-                if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
-                    heapq.heappop(queue)
-                    if batch.final_state is None:
-                        expired_batches.append(batch)
-                    else:
-                        raise Errors.IllegalStateError("%s batch created at %s gets unexpected final state %s" % (batch.topic_partition, batch.created, batch.final_state))
-                else:
-                    self._accumulator.maybe_update_next_batch_expiry_time(batch)
-                    break
-            else:
-                # Avoid mutating in_flight_batches during iteration
-                to_remove.append(tp)
-        for tp in to_remove:
-            del self._in_flight_batches[tp]
-        return expired_batches
+        self._sensors = SenderMetrics(metrics, self._client, self._metadata)

    def run(self):
        """The main run loop for the sender thread."""
-        log.debug("%s: Starting Kafka producer I/O thread.", str(self))
+        log.debug("Starting Kafka producer I/O thread.")

        # main loop, runs until close is called
        while self._running:
            try:
                self.run_once()
            except Exception:
-                log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
+                log.exception("Uncaught error in kafka producer I/O thread")

-        log.debug("%s: Beginning shutdown of Kafka producer I/O thread, sending"
-                  " remaining records.", str(self))
+        log.debug("Beginning shutdown of Kafka producer I/O thread, sending"
+                  " remaining records.")

        # okay we stopped accepting requests but there may still be
        # requests in the accumulator or waiting for acknowledgment,
        # wait until these are completed.
        while (not self._force_close
-               and (self._accumulator.has_undrained()
+               and (self._accumulator.has_unsent()
                    or self._client.in_flight_request_count() > 0)):
            try:
                self.run_once()
            except Exception:
-                log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
+                log.exception("Uncaught error in kafka producer I/O thread")

        if self._force_close:
            # We need to fail all the incomplete batches and wake up the
@@ -133,75 +83,38 @@ class Sender(threading.Thread):
        try:
            self._client.close()
        except Exception:
-            log.exception("%s: Failed to close network client", str(self))
+            log.exception("Failed to close network client")

-        log.debug("%s: Shutdown of Kafka producer I/O thread has completed.", str(self))
+        log.debug("Shutdown of Kafka producer I/O thread has completed.")

    def run_once(self):
        """Run a single iteration of sending."""
        while self._topics_to_add:
            self._client.add_topic(self._topics_to_add.pop())

-        if self._transaction_manager:
-            try:
-                if not self._transaction_manager.is_transactional():
-                    # this is an idempotent producer, so make sure we have a producer id
-                    self._maybe_wait_for_producer_id()
-                elif self._transaction_manager.has_in_flight_transactional_request() or self._maybe_send_transactional_request():
-                    # as long as there are outstanding transactional requests, we simply wait for them to return
-                    self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
-                    return
-
-                # do not continue sending if the transaction manager is in a failed state or if there
-                # is no producer id (for the idempotent case).
-                if self._transaction_manager.has_fatal_error() or not self._transaction_manager.has_producer_id():
-                    last_error = self._transaction_manager.last_error
-                    if last_error is not None:
-                        self._maybe_abort_batches(last_error)
-                    self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
-                    return
-                elif self._transaction_manager.has_abortable_error():
-                    self._accumulator.abort_undrained_batches(self._transaction_manager.last_error)
-
-            except Errors.SaslAuthenticationFailedError as e:
-                # This is already logged as error, but propagated here to perform any clean ups.
-                log.debug("%s: Authentication exception while processing transactional request: %s", str(self), e)
-                self._transaction_manager.authentication_failed(e)
-
-        poll_timeout_ms = self._send_producer_data()
-        self._client.poll(timeout_ms=poll_timeout_ms)
-
-    def _send_producer_data(self, now=None):
-        now = time.time() if now is None else now
        # get the list of partitions with data ready to send
-        result = self._accumulator.ready(self._metadata, now=now)
+        result = self._accumulator.ready(self._metadata)
        ready_nodes, next_ready_check_delay, unknown_leaders_exist = result

        # if there are any partitions whose leaders are not known yet, force
        # metadata update
        if unknown_leaders_exist:
-            log.debug('%s: Unknown leaders exist, requesting metadata update', str(self))
+            log.debug('Unknown leaders exist, requesting metadata update')
            self._metadata.request_update()

        # remove any nodes we aren't ready to send to
-        not_ready_timeout_ms = float('inf')
+        not_ready_timeout = float('inf')
        for node in list(ready_nodes):
            if not self._client.is_ready(node):
-                node_delay_ms = self._client.connection_delay(node)
-                log.debug('%s: Node %s not ready; delaying produce of accumulated batch (%f ms)', str(self), node, node_delay_ms)
+                log.debug('Node %s not ready; delaying produce of accumulated batch', node)
                self._client.maybe_connect(node, wakeup=False)
                ready_nodes.remove(node)
-                not_ready_timeout_ms = min(not_ready_timeout_ms, node_delay_ms)
+                not_ready_timeout = min(not_ready_timeout,
+                                        self._client.connection_delay(node))

        # create produce requests
        batches_by_node = self._accumulator.drain(
-            self._metadata, ready_nodes, self.config['max_request_size'], now=now)
-
-        for batch_list in six.itervalues(batches_by_node):
-            for batch in batch_list:
-                item = (batch.created, batch)
-                queue = self._in_flight_batches[batch.topic_partition]
-                heapq.heappush(queue, item)
+            self._metadata, ready_nodes, self.config['max_request_size'])

        if self.config['guarantee_message_order']:
            # Mute all the partitions drained
@@ -209,130 +122,42 @@ class Sender(threading.Thread):
                for batch in batch_list:
                    self._accumulator.muted.add(batch.topic_partition)

-        self._accumulator.reset_next_batch_expiry_time()
-        expired_batches = self._accumulator.expired_batches(now=now)
-        expired_batches.extend(self._get_expired_inflight_batches(now=now))
-
-        if expired_batches:
-            log.debug("%s: Expired %s batches in accumulator", str(self), len(expired_batches))
-
-        # Reset the producer_id if an expired batch has previously been sent to the broker.
-        # See the documentation of `TransactionState.reset_producer_id` to understand why
-        # we need to reset the producer id here.
-        if self._transaction_manager and any([batch.in_retry() for batch in expired_batches]):
-            needs_transaction_state_reset = True
-        else:
-            needs_transaction_state_reset = False
-
+        expired_batches = self._accumulator.abort_expired_batches(
+            self.config['request_timeout_ms'], self._metadata)
        for expired_batch in expired_batches:
-            error = Errors.KafkaTimeoutError(
-                "Expiring %d record(s) for %s: %s ms has passed since batch creation" % (
-                    expired_batch.record_count, expired_batch.topic_partition,
-                    int((time.time() - expired_batch.created) * 1000)))
-            self._fail_batch(expired_batch, error, base_offset=-1)
-
-        if self._sensors:
-            self._sensors.update_produce_request_metrics(batches_by_node)
-
-        if needs_transaction_state_reset:
-            self._transaction_manager.reset_producer_id()
-            return 0
+            self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count)

+        self._sensors.update_produce_request_metrics(batches_by_node)
        requests = self._create_produce_requests(batches_by_node)
        # If we have any nodes that are ready to send + have sendable data,
        # poll with 0 timeout so this can immediately loop and try sending more
-        # data. Otherwise, the timeout will be the smaller value between next
-        # batch expiry time, and the delay time for checking data availability.
-        # Note that the nodes may have data that isn't yet sendable due to
-        # lingering, backing off, etc. This specifically does not include nodes with
+        # data. Otherwise, the timeout is determined by nodes that have
+        # partitions with data that isn't yet sendable (e.g. lingering, backing
+        # off). Note that this specifically does not include nodes with
        # sendable data that aren't ready to send since they would cause busy
        # looping.
-        poll_timeout_ms = min(next_ready_check_delay * 1000,
-                              not_ready_timeout_ms,
-                              self._accumulator.next_expiry_time_ms - now * 1000)
-        if poll_timeout_ms < 0:
-            poll_timeout_ms = 0
-
+        poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout)
        if ready_nodes:
-            log.debug("%s: Nodes with data ready to send: %s", str(self), ready_nodes) # trace
-            log.debug("%s: Created %d produce requests: %s", str(self), len(requests), requests) # trace
-            # if some partitions are already ready to be sent, the select time
-            # would be 0; otherwise if some partition already has some data
-            # accumulated but not ready yet, the select time will be the time
-            # difference between now and its linger expiry time; otherwise the
-            # select time will be the time difference between now and the
-            # metadata expiry time
+            log.debug("Nodes with data ready to send: %s", ready_nodes) # trace
+            log.debug("Created %d produce requests: %s", len(requests), requests) # trace
            poll_timeout_ms = 0

        for node_id, request in six.iteritems(requests):
            batches = batches_by_node[node_id]
-            log.debug('%s: Sending Produce Request: %r', str(self), request)
+            log.debug('Sending Produce Request: %r', request)
            (self._client.send(node_id, request, wakeup=False)
                 .add_callback(
                     self._handle_produce_response, node_id, time.time(), batches)
                 .add_errback(
                     self._failed_produce, batches, node_id))
-        return poll_timeout_ms

-    def _maybe_send_transactional_request(self):
-        if self._transaction_manager.is_completing() and self._accumulator.has_incomplete:
-            if self._transaction_manager.is_aborting():
-                self._accumulator.abort_undrained_batches(Errors.KafkaError("Failing batch since transaction was aborted"))
-            # There may still be requests left which are being retried. Since we do not know whether they had
-            # been successfully appended to the broker log, we must resend them until their final status is clear.
-            # If they had been appended and we did not receive the error, then our sequence number would no longer
-            # be correct which would lead to an OutOfSequenceNumberError.
-            if not self._accumulator.flush_in_progress():
-                self._accumulator.begin_flush()
-
-        next_request_handler = self._transaction_manager.next_request_handler(self._accumulator.has_incomplete)
-        if next_request_handler is None:
-            return False
-
-        log.debug("%s: Sending transactional request %s", str(self), next_request_handler.request)
-        while not self._force_close:
-            target_node = None
-            try:
-                if next_request_handler.needs_coordinator():
-                    target_node = self._transaction_manager.coordinator(next_request_handler.coordinator_type)
-                    if target_node is None:
-                        self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
-                        break
-                    elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
-                        self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
-                        target_node = None
-                        break
-                else:
-                    target_node = self._client.least_loaded_node()
-                    if target_node is not None and not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
-                        target_node = None
-
-                if target_node is not None:
-                    if next_request_handler.is_retry:
-                        time.sleep(self.config['retry_backoff_ms'] / 1000)
-                    txn_correlation_id = self._transaction_manager.next_in_flight_request_correlation_id()
-                    future = self._client.send(target_node, next_request_handler.request)
-                    future.add_both(next_request_handler.on_complete, txn_correlation_id)
-                    return True
-
-            except Exception as e:
-                log.warn("%s: Got an exception when trying to find a node to send a transactional request to. Going to back off and retry: %s", str(self), e)
-                if next_request_handler.needs_coordinator():
-                    self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
-                    break
-
-            time.sleep(self.config['retry_backoff_ms'] / 1000)
-            self._metadata.request_update()
-
-        if target_node is None:
-            self._transaction_manager.retry(next_request_handler)
-
-        return True
-
-    def _maybe_abort_batches(self, exc):
-        if self._accumulator.has_incomplete:
-            log.error("%s: Aborting producer batches due to fatal error: %s", str(self), exc)
-            self._accumulator.abort_batches(exc)
+        # if some partitions are already ready to be sent, the select time
+        # would be 0; otherwise if some partition already has some data
+        # accumulated but not ready yet, the select time will be the time
+        # difference between now and its linger expiry time; otherwise the
+        # select time will be the time difference between now and the
+        # metadata expiry time
+        self._client.poll(timeout_ms=poll_timeout_ms)

    def initiate_close(self):
        """Start closing the sender (won't complete until all data is sent)."""
@@ -355,164 +180,82 @@ class Sender(threading.Thread):
            self._topics_to_add.add(topic)
            self.wakeup()

-    def _maybe_wait_for_producer_id(self):
-        while not self._transaction_manager.has_producer_id():
-            try:
-                node_id = self._client.least_loaded_node()
-                if node_id is None or not self._client.await_ready(node_id):
-                    log.debug("%s, Could not find an available broker to send InitProducerIdRequest to." +
-                              " Will back off and try again.", str(self))
-                    time.sleep(self._client.least_loaded_node_refresh_ms() / 1000)
-                    continue
-                version = self._client.api_version(InitProducerIdRequest, max_version=1)
-                request = InitProducerIdRequest[version](
-                    transactional_id=self.config['transactional_id'],
-                    transaction_timeout_ms=self.config['transaction_timeout_ms'],
-                )
-                response = self._client.send_and_receive(node_id, request)
-                error_type = Errors.for_code(response.error_code)
-                if error_type is Errors.NoError:
-                    self._transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch))
-                    break
-                elif getattr(error_type, 'retriable', False):
-                    log.debug("%s: Retriable error from InitProducerId response: %s", str(self), error_type.__name__)
-                    if getattr(error_type, 'invalid_metadata', False):
-                        self._metadata.request_update()
-                else:
-                    self._transaction_manager.transition_to_fatal_error(error_type())
-                    break
-            except Errors.KafkaConnectionError:
-                log.debug("%s: Broker %s disconnected while awaiting InitProducerId response", str(self), node_id)
-            except Errors.RequestTimedOutError:
-                log.debug("%s: InitProducerId request to node %s timed out", str(self), node_id)
-            log.debug("%s: Retry InitProducerIdRequest in %sms.", str(self), self.config['retry_backoff_ms'])
-            time.sleep(self.config['retry_backoff_ms'] / 1000)
-
    def _failed_produce(self, batches, node_id, error):
-        log.error("%s: Error sending produce request to node %d: %s", str(self), node_id, error) # trace
+        log.debug("Error sending produce request to node %d: %s", node_id, error) # trace
        for batch in batches:
-            self._complete_batch(batch, error, -1)
+            self._complete_batch(batch, error, -1, None)

    def _handle_produce_response(self, node_id, send_time, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
-        log.debug('%s: Parsing produce response: %r', str(self), response)
+        log.debug('Parsing produce response: %r', response)
        if response:
            batches_by_partition = dict([(batch.topic_partition, batch)
                                         for batch in batches])

            for topic, partitions in response.topics:
                for partition_info in partitions:
+                    global_error = None
+                    log_start_offset = None
                    if response.API_VERSION < 2:
                        partition, error_code, offset = partition_info
                        ts = None
                    elif 2 <= response.API_VERSION <= 4:
                        partition, error_code, offset, ts = partition_info
                    elif 5 <= response.API_VERSION <= 7:
-                        partition, error_code, offset, ts, _log_start_offset = partition_info
+                        partition, error_code, offset, ts, log_start_offset = partition_info
                    else:
-                        # Currently unused / TODO: KIP-467
-                        partition, error_code, offset, ts, _log_start_offset, _record_errors, _global_error = partition_info
+                        # the ignored parameter is record_error of type list[(batch_index: int, error_message: str)]
+                        partition, error_code, offset, ts, log_start_offset, _, global_error = partition_info
                    tp = TopicPartition(topic, partition)
                    error = Errors.for_code(error_code)
                    batch = batches_by_partition[tp]
-                    self._complete_batch(batch, error, offset, timestamp_ms=ts)
+                    self._complete_batch(batch, error, offset, ts, log_start_offset, global_error)
+
+            if response.API_VERSION > 0:
+                self._sensors.record_throttle_time(response.throttle_time_ms, node=node_id)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
-                self._complete_batch(batch, None, -1)
+                self._complete_batch(batch, None, -1, None)

-    def _fail_batch(self, batch, exception, base_offset=None, timestamp_ms=None):
-        exception = exception if type(exception) is not type else exception()
-        if self._transaction_manager:
-            if isinstance(exception, Errors.OutOfOrderSequenceNumberError) and \
-                    not self._transaction_manager.is_transactional() and \
-                    self._transaction_manager.has_producer_id(batch.producer_id):
-                log.error("%s: The broker received an out of order sequence number for topic-partition %s"
-                          " at offset %s. This indicates data loss on the broker, and should be investigated.",
-                          str(self), batch.topic_partition, base_offset)
-
-                # Reset the transaction state since we have hit an irrecoverable exception and cannot make any guarantees
-                # about the previously committed message. Note that this will discard the producer id and sequence
-                # numbers for all existing partitions.
-                self._transaction_manager.reset_producer_id()
-            elif isinstance(exception, (Errors.ClusterAuthorizationFailedError,
-                                        Errors.TransactionalIdAuthorizationFailedError,
-                                        Errors.ProducerFencedError,
-                                        Errors.InvalidTxnStateError)):
-                self._transaction_manager.transition_to_fatal_error(exception)
-            elif self._transaction_manager.is_transactional():
-                self._transaction_manager.transition_to_abortable_error(exception)
-
-        if self._sensors:
-            self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
-
-        if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms, exception=exception):
-            self._maybe_remove_from_inflight_batches(batch)
-            self._accumulator.deallocate(batch)
-
-    def _complete_batch(self, batch, error, base_offset, timestamp_ms=None):
+    def _complete_batch(self, batch, error, base_offset, timestamp_ms=None, log_start_offset=None, global_error=None):
        """Complete or retry the given batch of records.

        Arguments:
-            batch (ProducerBatch): The record batch
+            batch (RecordBatch): The record batch
            error (Exception): The error (or None if none)
            base_offset (int): The base offset assigned to the records if successful
            timestamp_ms (int, optional): The timestamp returned by the broker for this batch
+            log_start_offset (int): The start offset of the log at the time this produce response was created
+            global_error (str): The summarising error message
        """
        # Standardize no-error to None
        if error is Errors.NoError:
            error = None

-        if error is not None:
-            if self._can_retry(batch, error):
-                # retry
-                log.warning("%s: Got error produce response on topic-partition %s,"
-                            " retrying (%s attempts left). Error: %s",
-                            str(self), batch.topic_partition,
-                            self.config['retries'] - batch.attempts - 1,
-                            error)
-
-                # If idempotence is enabled only retry the request if the batch matches our current producer id and epoch
-                if not self._transaction_manager or self._transaction_manager.producer_id_and_epoch.match(batch):
-                    log.debug("%s: Retrying batch to topic-partition %s. Sequence number: %s",
-                              str(self), batch.topic_partition,
-                              self._transaction_manager.sequence_number(batch.topic_partition) if self._transaction_manager else None)
-                    self._accumulator.reenqueue(batch)
-                    self._maybe_remove_from_inflight_batches(batch)
-                    if self._sensors:
-                        self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
-                else:
-                    log.warning("%s: Attempted to retry sending a batch but the producer id/epoch changed from %s/%s to %s/%s. This batch will be dropped",
-                                str(self), batch.producer_id, batch.producer_epoch,
-                                self._transaction_manager.producer_id_and_epoch.producer_id,
-                                self._transaction_manager.producer_id_and_epoch.epoch)
-                    self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
-            else:
-                if error is Errors.TopicAuthorizationFailedError:
-                    error = error(batch.topic_partition.topic)
-
-                # tell the user the result of their request
-                self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
-
-            if error is Errors.UnknownTopicOrPartitionError:
-                log.warning("%s: Received unknown topic or partition error in produce request on partition %s."
-                            " The topic/partition may not exist or the user may not have Describe access to it",
-                            str(self), batch.topic_partition)
-
-            if getattr(error, 'invalid_metadata', False):
-                self._metadata.request_update()
-
+        if error is not None and self._can_retry(batch, error):
+            # retry
+            log.warning("Got error produce response on topic-partition %s,"
+                        " retrying (%d attempts left). Error: %s",
+                        batch.topic_partition,
+                        self.config['retries'] - batch.attempts - 1,
+                        global_error or error)
+            self._accumulator.reenqueue(batch)
+            self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
        else:
-            if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms):
-                self._maybe_remove_from_inflight_batches(batch)
-                self._accumulator.deallocate(batch)
+            if error is Errors.TopicAuthorizationFailedError:
+                error = error(batch.topic_partition.topic)

-            if self._transaction_manager and self._transaction_manager.producer_id_and_epoch.match(batch):
-                self._transaction_manager.increment_sequence_number(batch.topic_partition, batch.record_count)
-                log.debug("%s: Incremented sequence number for topic-partition %s to %s", str(self), batch.topic_partition,
-                          self._transaction_manager.sequence_number(batch.topic_partition))
+            # tell the user the result of their request
+            batch.done(base_offset, timestamp_ms, error, log_start_offset, global_error)
+            self._accumulator.deallocate(batch)
+            if error is not None:
+                self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
+
+        if getattr(error, 'invalid_metadata', False):
+            self._metadata.request_update()

        # Unmute the completed partition.
        if self.config['guarantee_message_order']:
@@ -523,10 +266,8 @@ class Sender(threading.Thread):
        We can retry a send if the error is transient and the number of
        attempts taken is fewer than the maximum allowed
        """
-        return (not batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms) and
-                batch.attempts < self.config['retries'] and
-                batch.final_state is None and
-                getattr(error, 'retriable', False))
+        return (batch.attempts < self.config['retries']
+                and getattr(error, 'retriable', False))

    def _create_produce_requests(self, collated):
        """
@@ -534,24 +275,23 @@ class Sender(threading.Thread):
        per-node basis.

        Arguments:
-            collated: {node_id: [ProducerBatch]}
+            collated: {node_id: [RecordBatch]}

        Returns:
-            dict: {node_id: ProduceRequest} (version depends on client api_versions)
+            dict: {node_id: ProduceRequest} (version depends on api_version)
        """
        requests = {}
        for node_id, batches in six.iteritems(collated):
-            if batches:
-                requests[node_id] = self._produce_request(
-                    node_id, self.config['acks'],
-                    self.config['request_timeout_ms'], batches)
+            requests[node_id] = self._produce_request(
+                node_id, self.config['acks'],
+                self.config['request_timeout_ms'], batches)
        return requests

    def _produce_request(self, node_id, acks, timeout, batches):
        """Create a produce request from the given record batches.

        Returns:
-            ProduceRequest (version depends on client api_versions)
+            ProduceRequest (version depends on api_version)
        """
        produce_records_by_partition = collections.defaultdict(dict)
        for batch in batches:
@@ -561,26 +301,32 @@ class Sender(threading.Thread):
            buf = batch.records.buffer()
            produce_records_by_partition[topic][partition] = buf

-        version = self._client.api_version(ProduceRequest, max_version=7)
-        topic_partition_data = [
-            (topic, list(partition_info.items()))
-            for topic, partition_info in six.iteritems(produce_records_by_partition)]
-        transactional_id = self._transaction_manager.transactional_id if self._transaction_manager else None
-        if version >= 3:
-            return ProduceRequest[version](
-                transactional_id=transactional_id,
-                required_acks=acks,
-                timeout=timeout,
-                topics=topic_partition_data,
-            )
+        kwargs = {}
+        if self.config['api_version'] >= (2, 1):
+            version = 7
+        elif self.config['api_version'] >= (2, 0):
+            version = 6
+        elif self.config['api_version'] >= (1, 1):
+            version = 5
+        elif self.config['api_version'] >= (1, 0):
+            version = 4
+        elif self.config['api_version'] >= (0, 11):
+            version = 3
+            kwargs = dict(transactional_id=None)
+        elif self.config['api_version'] >= (0, 10):
+            version = 2
+        elif self.config['api_version'] == (0, 9):
+            version = 1
        else:
-            if transactional_id is not None:
-                log.warning('%s: Broker does not support ProduceRequest v3+, required for transactional_id', str(self))
-            return ProduceRequest[version](
-                required_acks=acks,
-                timeout=timeout,
-                topics=topic_partition_data,
-            )
+            version = 0
+        return ProduceRequest[version](
+            required_acks=acks,
+            timeout=timeout,
+            topics=[(topic, list(partition_info.items()))
+                    for topic, partition_info
+                    in six.iteritems(produce_records_by_partition)],
+            **kwargs
+        )

    def wakeup(self):
        """Wake up the selector associated with this send thread."""
@@ -589,9 +335,6 @@ class Sender(threading.Thread):
    def bootstrap_connected(self):
        return self._client.bootstrap_connected()

-    def __str__(self):
-        return "<Sender client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
-

 class SenderMetrics(object):

@@ -624,6 +367,15 @@ class SenderMetrics(object):
                        sensor_name=sensor_name,
                        description='The maximum time in ms record batches spent in the record accumulator.')

+        sensor_name = 'produce-throttle-time'
+        self.produce_throttle_time_sensor = self.metrics.sensor(sensor_name)
+        self.add_metric('produce-throttle-time-avg', Avg(),
+                        sensor_name=sensor_name,
+                        description='The average throttle time in ms')
+        self.add_metric('produce-throttle-time-max', Max(),
+                        sensor_name=sensor_name,
+                        description='The maximum throttle time in ms')
+
        sensor_name = 'records-per-request'
        self.records_per_request_sensor = self.metrics.sensor(sensor_name)
        self.add_metric('record-send-rate', Rate(),
@@ -746,9 +498,8 @@ class SenderMetrics(object):
                records += batch.record_count
                total_bytes += batch.records.size_in_bytes()

-            if node_batch:
-                self.records_per_request_sensor.record(records)
-                self.byte_rate_sensor.record(total_bytes)
+            self.records_per_request_sensor.record(records)
+            self.byte_rate_sensor.record(total_bytes)

    def record_retries(self, topic, count):
        self.retry_sensor.record(count)
@@ -761,3 +512,6 @@ class SenderMetrics(object):
        sensor = self.metrics.get_sensor('topic.' + topic + '.record-errors')
        if sensor:
            sensor.record(count)
+
+    def record_throttle_time(self, throttle_time_ms, node=None):
+        self.produce_throttle_time_sensor.record(throttle_time_ms)
--- a/venv/lib/python3.12/site-packages/kafka/producer/transaction_manager.py
+++ b/venv/lib/python3.12/site-packages/kafka/producer/transaction_manager.py
@@ -1,981 +0,0 @@
-from __future__ import absolute_import, division
-
-import abc 
-import collections
-import heapq
-import logging
-import threading
-
-from kafka.vendor import six
-
-try:
-    # enum in stdlib as of py3.4
-    from enum import IntEnum  # pylint: disable=import-error
-except ImportError:
-    # vendored backport module
-    from kafka.vendor.enum34 import IntEnum
-
-import kafka.errors as Errors
-from kafka.protocol.add_offsets_to_txn import AddOffsetsToTxnRequest
-from kafka.protocol.add_partitions_to_txn import AddPartitionsToTxnRequest
-from kafka.protocol.end_txn import EndTxnRequest
-from kafka.protocol.find_coordinator import FindCoordinatorRequest
-from kafka.protocol.init_producer_id import InitProducerIdRequest
-from kafka.protocol.txn_offset_commit import TxnOffsetCommitRequest
-from kafka.structs import TopicPartition
-
-
-log = logging.getLogger(__name__)
-
-
-NO_PRODUCER_ID = -1
-NO_PRODUCER_EPOCH = -1
-NO_SEQUENCE = -1
-
-
-class ProducerIdAndEpoch(object):
-    __slots__ = ('producer_id', 'epoch')
-
-    def __init__(self, producer_id, epoch):
-        self.producer_id = producer_id
-        self.epoch = epoch
-
-    @property
-    def is_valid(self):
-        return NO_PRODUCER_ID < self.producer_id
-
-    def match(self, batch):
-        return self.producer_id == batch.producer_id and self.epoch == batch.producer_epoch
-
-    def __eq__(self, other):
-        return isinstance(other, ProducerIdAndEpoch) and self.producer_id == other.producer_id and self.epoch == other.epoch
-
-    def __str__(self):
-        return "ProducerIdAndEpoch(producer_id={}, epoch={})".format(self.producer_id, self.epoch)
-
-
-class TransactionState(IntEnum):
-    UNINITIALIZED = 0
-    INITIALIZING = 1
-    READY = 2
-    IN_TRANSACTION = 3
-    COMMITTING_TRANSACTION = 4
-    ABORTING_TRANSACTION = 5
-    ABORTABLE_ERROR = 6
-    FATAL_ERROR = 7
-
-    @classmethod
-    def is_transition_valid(cls, source, target):
-        if target == cls.INITIALIZING:
-            return source == cls.UNINITIALIZED
-        elif target == cls.READY:
-            return source in (cls.INITIALIZING, cls.COMMITTING_TRANSACTION, cls.ABORTING_TRANSACTION)
-        elif target == cls.IN_TRANSACTION:
-            return source == cls.READY
-        elif target == cls.COMMITTING_TRANSACTION:
-            return source == cls.IN_TRANSACTION
-        elif target == cls.ABORTING_TRANSACTION:
-            return source in (cls.IN_TRANSACTION, cls.ABORTABLE_ERROR)
-        elif target == cls.ABORTABLE_ERROR:
-            return source in (cls.IN_TRANSACTION, cls.COMMITTING_TRANSACTION, cls.ABORTABLE_ERROR)
-        elif target == cls.UNINITIALIZED:
-            # Disallow transitions to UNITIALIZED
-            return False
-        elif target == cls.FATAL_ERROR:
-            # We can transition to FATAL_ERROR unconditionally.
-            # FATAL_ERROR is never a valid starting state for any transition. So the only option is to close the
-            # producer or do purely non transactional requests.
-            return True
-
-
-class Priority(IntEnum):
-    # We use the priority to determine the order in which requests need to be sent out. For instance, if we have
-    # a pending FindCoordinator request, that must always go first. Next, If we need a producer id, that must go second.
-    # The endTxn request must always go last.
-    FIND_COORDINATOR = 0
-    INIT_PRODUCER_ID = 1
-    ADD_PARTITIONS_OR_OFFSETS = 2
-    END_TXN = 3
-
-
-class TransactionManager(object):
-    """
-    A class which maintains state for transactions. Also keeps the state necessary to ensure idempotent production.
-    """
-    NO_INFLIGHT_REQUEST_CORRELATION_ID = -1
-    # The retry_backoff_ms is overridden to the following value if the first AddPartitions receives a
-    # CONCURRENT_TRANSACTIONS error.
-    ADD_PARTITIONS_RETRY_BACKOFF_MS = 20
-
-    def __init__(self, transactional_id=None, transaction_timeout_ms=0, retry_backoff_ms=100, api_version=(0, 11), metadata=None):
-        self._api_version = api_version
-        self._metadata = metadata
-
-        self._sequence_numbers = collections.defaultdict(lambda: 0)
-
-        self.transactional_id = transactional_id
-        self.transaction_timeout_ms = transaction_timeout_ms
-        self._transaction_coordinator = None
-        self._consumer_group_coordinator = None
-        self._new_partitions_in_transaction = set()
-        self._pending_partitions_in_transaction = set()
-        self._partitions_in_transaction = set()
-        self._pending_txn_offset_commits = dict()
-
-        self._current_state = TransactionState.UNINITIALIZED
-        self._last_error = None
-        self.producer_id_and_epoch = ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH)
-
-        self._transaction_started = False
-
-        self._pending_requests = [] # priority queue via heapq
-        self._pending_requests_sort_id = 0
-        self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID
-
-        # This is used by the TxnRequestHandlers to control how long to back off before a given request is retried.
-        # For instance, this value is lowered by the AddPartitionsToTxnHandler when it receives a CONCURRENT_TRANSACTIONS
-        # error for the first AddPartitionsRequest in a transaction.
-        self.retry_backoff_ms = retry_backoff_ms
-        self._lock = threading.Condition()
-
-    def initialize_transactions(self):
-        with self._lock:
-            self._ensure_transactional()
-            self._transition_to(TransactionState.INITIALIZING)
-            self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH))
-            self._sequence_numbers.clear()
-            handler = InitProducerIdHandler(self, self.transaction_timeout_ms)
-            self._enqueue_request(handler)
-            return handler.result
-
-    def begin_transaction(self):
-        with self._lock:
-            self._ensure_transactional()
-            self._maybe_fail_with_error()
-            self._transition_to(TransactionState.IN_TRANSACTION)
-
-    def begin_commit(self):
-        with self._lock:
-            self._ensure_transactional()
-            self._maybe_fail_with_error()
-            self._transition_to(TransactionState.COMMITTING_TRANSACTION)
-            return self._begin_completing_transaction(True)
-
-    def begin_abort(self):
-        with self._lock:
-            self._ensure_transactional()
-            if self._current_state != TransactionState.ABORTABLE_ERROR:
-                self._maybe_fail_with_error()
-            self._transition_to(TransactionState.ABORTING_TRANSACTION)
-
-            # We're aborting the transaction, so there should be no need to add new partitions
-            self._new_partitions_in_transaction.clear()
-            return self._begin_completing_transaction(False)
-
-    def _begin_completing_transaction(self, committed):
-        if self._new_partitions_in_transaction:
-            self._enqueue_request(self._add_partitions_to_transaction_handler())
-        handler = EndTxnHandler(self, committed)
-        self._enqueue_request(handler)
-        return handler.result
-
-    def send_offsets_to_transaction(self, offsets, consumer_group_id):
-        with self._lock:
-            self._ensure_transactional()
-            self._maybe_fail_with_error()
-            if self._current_state != TransactionState.IN_TRANSACTION:
-                raise Errors.KafkaError("Cannot send offsets to transaction because the producer is not in an active transaction")
-
-            log.debug("Begin adding offsets %s for consumer group %s to transaction", offsets, consumer_group_id)
-            handler = AddOffsetsToTxnHandler(self, consumer_group_id, offsets)
-            self._enqueue_request(handler)
-            return handler.result
-
-    def maybe_add_partition_to_transaction(self, topic_partition):
-        with self._lock:
-            self._fail_if_not_ready_for_send()
-
-            if self.is_partition_added(topic_partition) or self.is_partition_pending_add(topic_partition):
-                return
-
-            log.debug("Begin adding new partition %s to transaction", topic_partition)
-            self._new_partitions_in_transaction.add(topic_partition)
-
-    def _fail_if_not_ready_for_send(self):
-        with self._lock:
-            if self.has_error():
-                raise Errors.KafkaError(
-                        "Cannot perform send because at least one previous transactional or"
-                        " idempotent request has failed with errors.", self._last_error)
-
-            if self.is_transactional():
-                if not self.has_producer_id():
-                    raise Errors.IllegalStateError(
-                            "Cannot perform a 'send' before completing a call to init_transactions"
-                            " when transactions are enabled.")
-
-                if self._current_state != TransactionState.IN_TRANSACTION:
-                    raise Errors.IllegalStateError("Cannot call send in state %s" % (self._current_state.name,))
-
-    def is_send_to_partition_allowed(self, tp):
-        with self._lock:
-            if self.has_fatal_error():
-                return False
-            return not self.is_transactional() or tp in self._partitions_in_transaction
-
-    def has_producer_id(self, producer_id=None):
-        if producer_id is None:
-            return self.producer_id_and_epoch.is_valid
-        else:
-            return self.producer_id_and_epoch.producer_id == producer_id
-
-    def is_transactional(self):
-        return self.transactional_id is not None
-
-    def has_partitions_to_add(self):
-        with self._lock:
-            return bool(self._new_partitions_in_transaction) or bool(self._pending_partitions_in_transaction)
-
-    def is_completing(self):
-        with self._lock:
-            return self._current_state in (
-                TransactionState.COMMITTING_TRANSACTION,
-                TransactionState.ABORTING_TRANSACTION)
-
-    @property
-    def last_error(self):
-        return self._last_error
-
-    def has_error(self):
-        with self._lock:
-            return self._current_state in (
-                TransactionState.ABORTABLE_ERROR,
-                TransactionState.FATAL_ERROR)
-
-    def is_aborting(self):
-        with self._lock:
-            return self._current_state == TransactionState.ABORTING_TRANSACTION
-
-    def transition_to_abortable_error(self, exc):
-        with self._lock:
-            if self._current_state == TransactionState.ABORTING_TRANSACTION:
-                log.debug("Skipping transition to abortable error state since the transaction is already being "
-                          " aborted. Underlying exception: %s", exc)
-                return
-            self._transition_to(TransactionState.ABORTABLE_ERROR, error=exc)
-
-    def transition_to_fatal_error(self, exc):
-        with self._lock:
-            self._transition_to(TransactionState.FATAL_ERROR, error=exc)
-
-    def is_partition_added(self, partition):
-        with self._lock:
-            return partition in self._partitions_in_transaction
-
-    def is_partition_pending_add(self, partition):
-        return partition in self._new_partitions_in_transaction or partition in self._pending_partitions_in_transaction
-
-    def has_producer_id_and_epoch(self, producer_id, producer_epoch):
-        return (
-            self.producer_id_and_epoch.producer_id == producer_id and
-            self.producer_id_and_epoch.epoch == producer_epoch
-        )
-
-    def set_producer_id_and_epoch(self, producer_id_and_epoch):
-        if not isinstance(producer_id_and_epoch, ProducerIdAndEpoch):
-            raise TypeError("ProducerAndIdEpoch type required")
-        log.info("ProducerId set to %s with epoch %s",
-                 producer_id_and_epoch.producer_id, producer_id_and_epoch.epoch)
-        self.producer_id_and_epoch = producer_id_and_epoch
-
-    def reset_producer_id(self):
-        """
-        This method is used when the producer needs to reset its internal state because of an irrecoverable exception
-        from the broker.
-
-        We need to reset the producer id and associated state when we have sent a batch to the broker, but we either get
-        a non-retriable exception or we run out of retries, or the batch expired in the producer queue after it was already
-        sent to the broker.
-
-        In all of these cases, we don't know whether batch was actually committed on the broker, and hence whether the
-        sequence number was actually updated. If we don't reset the producer state, we risk the chance that all future
-        messages will return an OutOfOrderSequenceNumberError.
-
-        Note that we can't reset the producer state for the transactional producer as this would mean bumping the epoch
-        for the same producer id. This might involve aborting the ongoing transaction during the initProducerIdRequest,
-        and the user would not have any way of knowing this happened. So for the transactional producer,
-        it's best to return the produce error to the user and let them abort the transaction and close the producer explicitly.
-        """
-        with self._lock:
-            if self.is_transactional():
-                raise Errors.IllegalStateError( 
-                    "Cannot reset producer state for a transactional producer."
-                    " You must either abort the ongoing transaction or"
-                    " reinitialize the transactional producer instead")
-            self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH))
-            self._sequence_numbers.clear()
-
-    def sequence_number(self, tp):
-        with self._lock:
-            return self._sequence_numbers[tp]
-
-    def increment_sequence_number(self, tp, increment):
-        with self._lock:
-            if tp not in self._sequence_numbers:
-                raise Errors.IllegalStateError("Attempt to increment sequence number for a partition with no current sequence.")
-            # Sequence number wraps at java max int
-            base = self._sequence_numbers[tp]
-            if base > (2147483647 - increment):
-              self._sequence_numbers[tp] = increment - (2147483647 - base) - 1
-            else:
-                self._sequence_numbers[tp] += increment
-
-    def next_request_handler(self, has_incomplete_batches):
-        with self._lock:
-            if self._new_partitions_in_transaction:
-                self._enqueue_request(self._add_partitions_to_transaction_handler())
-
-            if not self._pending_requests:
-                return None
-
-            _, _, next_request_handler = self._pending_requests[0]
-            # Do not send the EndTxn until all batches have been flushed
-            if isinstance(next_request_handler, EndTxnHandler) and has_incomplete_batches:
-                return None
-
-            heapq.heappop(self._pending_requests)
-            if self._maybe_terminate_request_with_error(next_request_handler):
-                log.debug("Not sending transactional request %s because we are in an error state",
-                          next_request_handler.request)
-                return None
-
-            if isinstance(next_request_handler, EndTxnHandler) and not self._transaction_started:
-                next_request_handler.result.done()
-                if self._current_state != TransactionState.FATAL_ERROR:
-                    log.debug("Not sending EndTxn for completed transaction since no partitions"
-                              " or offsets were successfully added")
-                    self._complete_transaction()
-                try:
-                    _, _, next_request_handler = heapq.heappop(self._pending_requests)
-                except IndexError:
-                    next_request_handler = None
-
-            if next_request_handler:
-                log.debug("Request %s dequeued for sending", next_request_handler.request)
-
-            return next_request_handler
-
-    def retry(self, request):
-        with self._lock:
-            request.set_retry()
-            self._enqueue_request(request)
-
-    def authentication_failed(self, exc):
-        with self._lock:
-            for _, _, request in self._pending_requests:
-                request.fatal_error(exc)
-
-    def coordinator(self, coord_type):
-        if coord_type == 'group':
-            return self._consumer_group_coordinator
-        elif coord_type == 'transaction':
-            return self._transaction_coordinator
-        else:
-            raise Errors.IllegalStateError("Received an invalid coordinator type: %s" % (coord_type,))
-
-    def lookup_coordinator_for_request(self, request):
-        self._lookup_coordinator(request.coordinator_type, request.coordinator_key)
-
-    def next_in_flight_request_correlation_id(self):
-        self._in_flight_request_correlation_id += 1
-        return self._in_flight_request_correlation_id
-
-    def clear_in_flight_transactional_request_correlation_id(self):
-        self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID
-
-    def has_in_flight_transactional_request(self):
-        return self._in_flight_request_correlation_id != self.NO_INFLIGHT_REQUEST_CORRELATION_ID
-
-    def has_fatal_error(self):
-        return self._current_state == TransactionState.FATAL_ERROR
-
-    def has_abortable_error(self):
-        return self._current_state == TransactionState.ABORTABLE_ERROR
-
-    # visible for testing
-    def _test_transaction_contains_partition(self, tp):
-        with self._lock:
-            return tp in self._partitions_in_transaction
-
-    # visible for testing
-    def _test_has_pending_offset_commits(self):
-        return bool(self._pending_txn_offset_commits)
-
-    # visible for testing
-    def _test_has_ongoing_transaction(self):
-        with self._lock:
-            # transactions are considered ongoing once started until completion or a fatal error
-            return self._current_state == TransactionState.IN_TRANSACTION or self.is_completing() or self.has_abortable_error()
-
-    # visible for testing
-    def _test_is_ready(self):
-        with self._lock:
-            return self.is_transactional() and self._current_state == TransactionState.READY
-
-    def _transition_to(self, target, error=None):
-        with self._lock:
-            if not self._current_state.is_transition_valid(self._current_state, target):
-                raise Errors.KafkaError("TransactionalId %s: Invalid transition attempted from state %s to state %s" % (
-                    self.transactional_id, self._current_state.name, target.name))
-
-            if target in (TransactionState.FATAL_ERROR, TransactionState.ABORTABLE_ERROR):
-                if error is None:
-                    raise Errors.IllegalArgumentError("Cannot transition to %s with an None exception" % (target.name,))
-                self._last_error = error
-            else:
-                self._last_error = None
-
-            if self._last_error is not None:
-                log.debug("Transition from state %s to error state %s (%s)", self._current_state.name, target.name, self._last_error)
-            else:
-                log.debug("Transition from state %s to %s", self._current_state, target)
-            self._current_state = target
-
-    def _ensure_transactional(self):
-        if not self.is_transactional():
-            raise Errors.IllegalStateError("Transactional method invoked on a non-transactional producer.")
-
-    def _maybe_fail_with_error(self):
-        if self.has_error():
-            raise Errors.KafkaError("Cannot execute transactional method because we are in an error state: %s" % (self._last_error,))
-
-    def _maybe_terminate_request_with_error(self, request_handler):
-        if self.has_error():
-            if self.has_abortable_error() and isinstance(request_handler, FindCoordinatorHandler):
-                # No harm letting the FindCoordinator request go through if we're expecting to abort
-                return False
-            request_handler.fail(self._last_error)
-            return True
-        return False
-
-    def _next_pending_requests_sort_id(self):
-        self._pending_requests_sort_id += 1
-        return self._pending_requests_sort_id
-
-    def _enqueue_request(self, request_handler):
-        log.debug("Enqueuing transactional request %s", request_handler.request)
-        heapq.heappush(
-            self._pending_requests,
-            (
-                request_handler.priority, # keep lowest priority at head of queue
-                self._next_pending_requests_sort_id(), # break ties
-                request_handler
-            )
-        )
-
-    def _lookup_coordinator(self, coord_type, coord_key):
-        with self._lock:
-            if coord_type == 'group':
-                self._consumer_group_coordinator = None
-            elif coord_type == 'transaction':
-                self._transaction_coordinator = None
-            else:
-                raise Errors.IllegalStateError("Invalid coordinator type: %s" % (coord_type,))
-        self._enqueue_request(FindCoordinatorHandler(self, coord_type, coord_key))
-
-    def _complete_transaction(self):
-        with self._lock:
-            self._transition_to(TransactionState.READY)
-            self._transaction_started = False
-            self._new_partitions_in_transaction.clear()
-            self._pending_partitions_in_transaction.clear()
-            self._partitions_in_transaction.clear()
-
-    def _add_partitions_to_transaction_handler(self):
-        with self._lock:
-            self._pending_partitions_in_transaction.update(self._new_partitions_in_transaction)
-            self._new_partitions_in_transaction.clear()
-            return AddPartitionsToTxnHandler(self, self._pending_partitions_in_transaction)
-
-
-class TransactionalRequestResult(object):
-    def __init__(self):
-        self._latch = threading.Event()
-        self._error = None
-
-    def done(self, error=None):
-        self._error = error
-        self._latch.set()
-
-    def wait(self, timeout_ms=None):
-        timeout = timeout_ms / 1000 if timeout_ms is not None else None
-        success = self._latch.wait(timeout)
-        if self._error:
-            raise self._error
-        return success
-
-    @property
-    def is_done(self):
-        return self._latch.is_set()
-
-    @property
-    def succeeded(self):
-        return self._latch.is_set() and self._error is None
-
-    @property
-    def failed(self):
-        return self._latch.is_set() and self._error is not None
-
-    @property
-    def exception(self):
-        return self._error
-
-
-@six.add_metaclass(abc.ABCMeta)
-class TxnRequestHandler(object):
-    def __init__(self, transaction_manager, result=None):
-        self.transaction_manager = transaction_manager
-        self.retry_backoff_ms = transaction_manager.retry_backoff_ms
-        self.request = None
-        self._result = result or TransactionalRequestResult()
-        self._is_retry = False
-
-    @property
-    def transactional_id(self):
-        return self.transaction_manager.transactional_id
-
-    @property
-    def producer_id(self):
-        return self.transaction_manager.producer_id_and_epoch.producer_id
-
-    @property
-    def producer_epoch(self):
-        return self.transaction_manager.producer_id_and_epoch.epoch
-
-    def fatal_error(self, exc):
-        self.transaction_manager.transition_to_fatal_error(exc)
-        self._result.done(error=exc)
-
-    def abortable_error(self, exc):
-        self.transaction_manager.transition_to_abortable_error(exc)
-        self._result.done(error=exc)
-
-    def fail(self, exc):
-        self._result.done(error=exc)
-
-    def reenqueue(self):
-        with self.transaction_manager._lock:
-            self._is_retry = True
-            self.transaction_manager._enqueue_request(self)
-
-    def on_complete(self, correlation_id, response_or_exc):
-        if correlation_id != self.transaction_manager._in_flight_request_correlation_id:
-            self.fatal_error(RuntimeError("Detected more than one in-flight transactional request."))
-        else:
-            self.transaction_manager.clear_in_flight_transactional_request_correlation_id()
-            if isinstance(response_or_exc, Errors.KafkaConnectionError):
-                log.debug("Disconnected from node. Will retry.")
-                if self.needs_coordinator():
-                    self.transaction_manager._lookup_coordinator(self.coordinator_type, self.coordinator_key)
-                self.reenqueue()
-            elif isinstance(response_or_exc, Errors.UnsupportedVersionError):
-                self.fatal_error(response_or_exc)
-            elif not isinstance(response_or_exc, (Exception, type(None))):
-                log.debug("Received transactional response %s for request %s", response_or_exc, self.request)
-                with self.transaction_manager._lock:
-                    self.handle_response(response_or_exc)
-            else:
-                self.fatal_error(Errors.KafkaError("Could not execute transactional request for unknown reasons: %s" % response_or_exc))
-
-    def needs_coordinator(self):
-        return self.coordinator_type is not None
-
-    @property
-    def result(self):
-        return self._result
-
-    @property
-    def coordinator_type(self):
-        return 'transaction'
-
-    @property
-    def coordinator_key(self):
-        return self.transaction_manager.transactional_id
-
-    def set_retry(self):
-        self._is_retry = True
-
-    @property
-    def is_retry(self):
-        return self._is_retry
-
-    @abc.abstractmethod
-    def handle_response(self, response):
-        pass
-
-    @abc.abstractproperty
-    def priority(self):
-        pass
-
-
-class InitProducerIdHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, transaction_timeout_ms):
-        super(InitProducerIdHandler, self).__init__(transaction_manager)
-
-        if transaction_manager._api_version >= (2, 0):
-            version = 1
-        else:
-            version = 0
-        self.request = InitProducerIdRequest[version](
-            transactional_id=self.transactional_id,
-            transaction_timeout_ms=transaction_timeout_ms)
-
-    @property
-    def priority(self):
-        return Priority.INIT_PRODUCER_ID
-
-    def handle_response(self, response):
-        error = Errors.for_code(response.error_code)
-
-        if error is Errors.NoError:
-            self.transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch))
-            self.transaction_manager._transition_to(TransactionState.READY)
-            self._result.done()
-        elif error in (Errors.NotCoordinatorError, Errors.CoordinatorNotAvailableError):
-            self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
-            self.reenqueue()
-        elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
-            self.reenqueue()
-        elif error is Errors.TransactionalIdAuthorizationFailedError:
-            self.fatal_error(error())
-        else:
-            self.fatal_error(Errors.KafkaError("Unexpected error in InitProducerIdResponse: %s" % (error())))
-
-class AddPartitionsToTxnHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, topic_partitions):
-        super(AddPartitionsToTxnHandler, self).__init__(transaction_manager)
-
-        if transaction_manager._api_version >= (2, 7):
-            version = 2
-        elif transaction_manager._api_version >= (2, 0):
-            version = 1
-        else:
-            version = 0
-        topic_data = collections.defaultdict(list)
-        for tp in topic_partitions:
-            topic_data[tp.topic].append(tp.partition)
-        self.request = AddPartitionsToTxnRequest[version](
-            transactional_id=self.transactional_id,
-            producer_id=self.producer_id,
-            producer_epoch=self.producer_epoch,
-            topics=list(topic_data.items()))
-
-    @property
-    def priority(self):
-        return Priority.ADD_PARTITIONS_OR_OFFSETS
-
-    def handle_response(self, response):
-        has_partition_errors = False
-        unauthorized_topics = set()
-        self.retry_backoff_ms = self.transaction_manager.retry_backoff_ms
-
-        results = {TopicPartition(topic, partition): Errors.for_code(error_code)
-                   for topic, partition_data in response.results
-                   for partition, error_code in partition_data}
-
-        for tp, error in six.iteritems(results):
-            if error is Errors.NoError:
-                continue
-            elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
-                self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
-                self.reenqueue()
-                return
-            elif error is Errors.ConcurrentTransactionsError:
-                self.maybe_override_retry_backoff_ms()
-                self.reenqueue()
-                return
-            elif error in (Errors.CoordinatorLoadInProgressError, Errors.UnknownTopicOrPartitionError):
-                self.reenqueue()
-                return
-            elif error is Errors.InvalidProducerEpochError:
-                self.fatal_error(error())
-                return
-            elif error is Errors.TransactionalIdAuthorizationFailedError:
-                self.fatal_error(error())
-                return
-            elif error in (Errors.InvalidProducerIdMappingError, Errors.InvalidTxnStateError):
-                self.fatal_error(Errors.KafkaError(error()))
-                return
-            elif error is Errors.TopicAuthorizationFailedError:
-                unauthorized_topics.add(tp.topic)
-            elif error is Errors.OperationNotAttemptedError:
-                log.debug("Did not attempt to add partition %s to transaction because other partitions in the"
-                          " batch had errors.", tp)
-                has_partition_errors = True
-            else:
-                log.error("Could not add partition %s due to unexpected error %s", tp, error())
-                has_partition_errors = True
-
-        partitions = set(results)
-
-        # Remove the partitions from the pending set regardless of the result. We use the presence
-        # of partitions in the pending set to know when it is not safe to send batches. However, if
-        # the partitions failed to be added and we enter an error state, we expect the batches to be
-        # aborted anyway. In this case, we must be able to continue sending the batches which are in
-        # retry for partitions that were successfully added.
-        self.transaction_manager._pending_partitions_in_transaction -= partitions
-
-        if unauthorized_topics:
-            self.abortable_error(Errors.TopicAuthorizationFailedError(unauthorized_topics))
-        elif has_partition_errors:
-            self.abortable_error(Errors.KafkaError("Could not add partitions to transaction due to errors: %s" % (results)))
-        else:
-            log.debug("Successfully added partitions %s to transaction", partitions)
-            self.transaction_manager._partitions_in_transaction.update(partitions)
-            self.transaction_manager._transaction_started = True
-            self._result.done()
-
-    def maybe_override_retry_backoff_ms(self):
-        # We only want to reduce the backoff when retrying the first AddPartition which errored out due to a
-        # CONCURRENT_TRANSACTIONS error since this means that the previous transaction is still completing and
-        # we don't want to wait too long before trying to start the new one.
-        #
-        # This is only a temporary fix, the long term solution is being tracked in
-        # https://issues.apache.org/jira/browse/KAFKA-5482
-        if not self.transaction_manager._partitions_in_transaction:
-            self.retry_backoff_ms = min(self.transaction_manager.ADD_PARTITIONS_RETRY_BACKOFF_MS, self.retry_backoff_ms)
-
-
-class FindCoordinatorHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, coord_type, coord_key):
-        super(FindCoordinatorHandler, self).__init__(transaction_manager)
-
-        self._coord_type = coord_type
-        self._coord_key = coord_key
-        if transaction_manager._api_version >= (2, 0):
-            version = 2
-        else:
-            version = 1
-        if coord_type == 'group':
-            coord_type_int8 = 0
-        elif coord_type == 'transaction':
-            coord_type_int8 = 1
-        else:
-            raise ValueError("Unrecognized coordinator type: %s" % (coord_type,))
-        self.request = FindCoordinatorRequest[version](
-            coordinator_key=coord_key,
-            coordinator_type=coord_type_int8,
-        )
-
-    @property
-    def priority(self):
-        return Priority.FIND_COORDINATOR
-
-    @property
-    def coordinator_type(self):
-        return None
-
-    @property
-    def coordinator_key(self):
-        return None
-
-    def handle_response(self, response):
-        error = Errors.for_code(response.error_code)
-
-        if error is Errors.NoError:
-            coordinator_id = self.transaction_manager._metadata.add_coordinator(
-                response, self._coord_type, self._coord_key)
-            if self._coord_type == 'group':
-                self.transaction_manager._consumer_group_coordinator = coordinator_id
-            elif self._coord_type == 'transaction':
-                self.transaction_manager._transaction_coordinator = coordinator_id
-            self._result.done()
-        elif error is Errors.CoordinatorNotAvailableError:
-            self.reenqueue()
-        elif error is Errors.TransactionalIdAuthorizationFailedError:
-            self.fatal_error(error())
-        elif error is Errors.GroupAuthorizationFailedError:
-            self.abortable_error(error(self._coord_key))
-        else:
-            self.fatal_error(Errors.KafkaError(
-                "Could not find a coordinator with type %s with key %s due to"
-                " unexpected error: %s" % (self._coord_type, self._coord_key, error())))
-
-
-class EndTxnHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, committed):
-        super(EndTxnHandler, self).__init__(transaction_manager)
-
-        if self.transaction_manager._api_version >= (2, 7):
-            version = 2
-        elif self.transaction_manager._api_version >= (2, 0):
-            version = 1
-        else:
-            version = 0
-        self.request = EndTxnRequest[version](
-            transactional_id=self.transactional_id,
-            producer_id=self.producer_id,
-            producer_epoch=self.producer_epoch,
-            committed=committed)
-
-    @property
-    def priority(self):
-        return Priority.END_TXN
-
-    def handle_response(self, response):
-        error = Errors.for_code(response.error_code)
-
-        if error is Errors.NoError:
-            self.transaction_manager._complete_transaction()
-            self._result.done()
-        elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
-            self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
-            self.reenqueue()
-        elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
-            self.reenqueue()
-        elif error is Errors.InvalidProducerEpochError:
-            self.fatal_error(error())
-        elif error is Errors.TransactionalIdAuthorizationFailedError:
-            self.fatal_error(error())
-        elif error is Errors.InvalidTxnStateError:
-            self.fatal_error(error())
-        else:
-            self.fatal_error(Errors.KafkaError("Unhandled error in EndTxnResponse: %s" % (error())))
-
-
-class AddOffsetsToTxnHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, consumer_group_id, offsets):
-        super(AddOffsetsToTxnHandler, self).__init__(transaction_manager)
-
-        self.consumer_group_id = consumer_group_id
-        self.offsets = offsets
-        if self.transaction_manager._api_version >= (2, 7):
-            version = 2
-        elif self.transaction_manager._api_version >= (2, 0):
-            version = 1
-        else:
-            version = 0
-        self.request = AddOffsetsToTxnRequest[version](
-            transactional_id=self.transactional_id,
-            producer_id=self.producer_id,
-            producer_epoch=self.producer_epoch,
-            group_id=consumer_group_id)
-
-    @property
-    def priority(self):
-        return Priority.ADD_PARTITIONS_OR_OFFSETS
-
-    def handle_response(self, response):
-        error = Errors.for_code(response.error_code)
-
-        if error is Errors.NoError:
-            log.debug("Successfully added partition for consumer group %s to transaction", self.consumer_group_id)
-
-            # note the result is not completed until the TxnOffsetCommit returns
-            for tp, offset in six.iteritems(self.offsets):
-                self.transaction_manager._pending_txn_offset_commits[tp] = offset
-            handler = TxnOffsetCommitHandler(self.transaction_manager, self.consumer_group_id,
-                                             self.transaction_manager._pending_txn_offset_commits, self._result)
-            self.transaction_manager._enqueue_request(handler)
-            self.transaction_manager._transaction_started = True
-        elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
-            self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
-            self.reenqueue()
-        elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
-            self.reenqueue()
-        elif error is Errors.InvalidProducerEpochError:
-            self.fatal_error(error())
-        elif error is Errors.TransactionalIdAuthorizationFailedError:
-            self.fatal_error(error())
-        elif error is Errors.GroupAuthorizationFailedError:
-            self.abortable_error(error(self.consumer_group_id))
-        else:
-            self.fatal_error(Errors.KafkaError("Unexpected error in AddOffsetsToTxnResponse: %s" % (error())))
-
-
-class TxnOffsetCommitHandler(TxnRequestHandler):
-    def __init__(self, transaction_manager, consumer_group_id, offsets, result):
-        super(TxnOffsetCommitHandler, self).__init__(transaction_manager, result=result)
-
-        self.consumer_group_id = consumer_group_id
-        self.offsets = offsets
-        self.request = self._build_request()
-
-    def _build_request(self):
-        if self.transaction_manager._api_version >= (2, 1):
-            version = 2
-        elif self.transaction_manager._api_version >= (2, 0):
-            version = 1
-        else:
-            version = 0
-
-        topic_data = collections.defaultdict(list)
-        for tp, offset in six.iteritems(self.offsets):
-            if version >= 2:
-                partition_data = (tp.partition, offset.offset, offset.leader_epoch, offset.metadata)
-            else:
-                partition_data = (tp.partition, offset.offset, offset.metadata)
-            topic_data[tp.topic].append(partition_data)
-
-        return TxnOffsetCommitRequest[version](
-            transactional_id=self.transactional_id,
-            group_id=self.consumer_group_id,
-            producer_id=self.producer_id,
-            producer_epoch=self.producer_epoch,
-            topics=list(topic_data.items()))
-
-    @property
-    def priority(self):
-        return Priority.ADD_PARTITIONS_OR_OFFSETS
-
-    @property
-    def coordinator_type(self):
-        return 'group'
-
-    @property
-    def coordinator_key(self):
-        return self.consumer_group_id
-
-    def handle_response(self, response):
-        lookup_coordinator = False
-        retriable_failure = False
-
-        errors = {TopicPartition(topic, partition): Errors.for_code(error_code)
-                  for topic, partition_data in response.topics
-                  for partition, error_code in partition_data}
-
-        for tp, error in six.iteritems(errors):
-            if error is Errors.NoError:
-                log.debug("Successfully added offsets for %s from consumer group %s to transaction.",
-                          tp, self.consumer_group_id)
-                del self.transaction_manager._pending_txn_offset_commits[tp]
-            elif error in (errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError, Errors.RequestTimedOutError):
-                retriable_failure = True
-                lookup_coordinator = True
-            elif error is Errors.UnknownTopicOrPartitionError:
-                retriable_failure = True
-            elif error is Errors.GroupAuthorizationFailedError:
-                self.abortable_error(error(self.consumer_group_id))
-                return
-            elif error in (Errors.TransactionalIdAuthorizationFailedError,
-                           Errors.InvalidProducerEpochError,
-                           Errors.UnsupportedForMessageFormatError):
-                self.fatal_error(error())
-                return
-            else:
-                self.fatal_error(Errors.KafkaError("Unexpected error in TxnOffsetCommitResponse: %s" % (error())))
-                return
-
-        if lookup_coordinator:
-            self.transaction_manager._lookup_coordinator('group', self.consumer_group_id)
-
-        if not retriable_failure:
-            # all attempted partitions were either successful, or there was a fatal failure.
-            # either way, we are not retrying, so complete the request.
-            self.result.done()
-
-        # retry the commits which failed with a retriable error.
-        elif self.transaction_manager._pending_txn_offset_commits:
-            self.offsets = self.transaction_manager._pending_txn_offset_commits
-            self.request = self._build_request()
-            self.reenqueue()