This commit is contained in:
@@ -4,7 +4,7 @@ __title__ = 'kafka'
|
||||
from kafka.version import __version__
|
||||
__author__ = 'Dana Powers'
|
||||
__license__ = 'Apache License 2.0'
|
||||
__copyright__ = 'Copyright 2025 Dana Powers, David Arthur, and Contributors'
|
||||
__copyright__ = 'Copyright 2016 Dana Powers, David Arthur, and Contributors'
|
||||
|
||||
# Set default logging handler to avoid "No handler found" warnings.
|
||||
import logging
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,142 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import argparse
|
||||
import pprint
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from kafka import KafkaConsumer
|
||||
|
||||
|
||||
class ConsumerPerformance(object):
|
||||
@staticmethod
|
||||
def run(args):
|
||||
try:
|
||||
props = {}
|
||||
for prop in args.consumer_config:
|
||||
k, v = prop.split('=')
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
if v == 'None':
|
||||
v = None
|
||||
elif v == 'False':
|
||||
v = False
|
||||
elif v == 'True':
|
||||
v = True
|
||||
props[k] = v
|
||||
|
||||
print('Initializing Consumer...')
|
||||
props['bootstrap_servers'] = args.bootstrap_servers
|
||||
props['auto_offset_reset'] = 'earliest'
|
||||
if 'group_id' not in props:
|
||||
props['group_id'] = 'kafka-consumer-benchmark'
|
||||
if 'consumer_timeout_ms' not in props:
|
||||
props['consumer_timeout_ms'] = 10000
|
||||
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
||||
for k, v in props.items():
|
||||
print('---> {0}={1}'.format(k, v))
|
||||
consumer = KafkaConsumer(args.topic, **props)
|
||||
print('---> group_id={0}'.format(consumer.config['group_id']))
|
||||
print('---> report stats every {0} secs'.format(args.stats_interval))
|
||||
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
||||
timer_stop = threading.Event()
|
||||
timer = StatsReporter(args.stats_interval, consumer,
|
||||
event=timer_stop,
|
||||
raw_metrics=args.raw_metrics)
|
||||
timer.start()
|
||||
print('-> OK!')
|
||||
print()
|
||||
|
||||
start_time = time.time()
|
||||
records = 0
|
||||
for msg in consumer:
|
||||
records += 1
|
||||
if records >= args.num_records:
|
||||
break
|
||||
|
||||
end_time = time.time()
|
||||
timer_stop.set()
|
||||
timer.join()
|
||||
print('Consumed {0} records'.format(records))
|
||||
print('Execution time:', end_time - start_time, 'secs')
|
||||
|
||||
except Exception:
|
||||
exc_info = sys.exc_info()
|
||||
traceback.print_exception(*exc_info)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class StatsReporter(threading.Thread):
|
||||
def __init__(self, interval, consumer, event=None, raw_metrics=False):
|
||||
super(StatsReporter, self).__init__()
|
||||
self.interval = interval
|
||||
self.consumer = consumer
|
||||
self.event = event
|
||||
self.raw_metrics = raw_metrics
|
||||
|
||||
def print_stats(self):
|
||||
metrics = self.consumer.metrics()
|
||||
if self.raw_metrics:
|
||||
pprint.pprint(metrics)
|
||||
else:
|
||||
print('{records-consumed-rate} records/sec ({bytes-consumed-rate} B/sec),'
|
||||
' {fetch-latency-avg} latency,'
|
||||
' {fetch-rate} fetch/s,'
|
||||
' {fetch-size-avg} fetch size,'
|
||||
' {records-lag-max} max record lag,'
|
||||
' {records-per-request-avg} records/req'
|
||||
.format(**metrics['consumer-fetch-manager-metrics']))
|
||||
|
||||
|
||||
def print_final(self):
|
||||
self.print_stats()
|
||||
|
||||
def run(self):
|
||||
while self.event and not self.event.wait(self.interval):
|
||||
self.print_stats()
|
||||
else:
|
||||
self.print_final()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to verify the consumer performance.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=(),
|
||||
help='host:port for cluster bootstrap servers')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic for consumer test (default: kafka-python-benchmark-test)',
|
||||
default='kafka-python-benchmark-test')
|
||||
parser.add_argument(
|
||||
'--num-records', type=int,
|
||||
help='number of messages to consume (default: 1000000)',
|
||||
default=1000000)
|
||||
parser.add_argument(
|
||||
'--consumer-config', type=str, nargs='+', default=(),
|
||||
help='kafka consumer related configuration properties like '
|
||||
'bootstrap_servers,client_id etc..')
|
||||
parser.add_argument(
|
||||
'--fixture-compression', type=str,
|
||||
help='specify a compression type for use with broker fixtures / producer')
|
||||
parser.add_argument(
|
||||
'--stats-interval', type=int,
|
||||
help='Interval in seconds for stats reporting to console (default: 5)',
|
||||
default=5)
|
||||
parser.add_argument(
|
||||
'--raw-metrics', action='store_true',
|
||||
help='Enable this flag to print full metrics dict on each interval')
|
||||
return parser
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = get_args_parser().parse_args()
|
||||
ConsumerPerformance.run(args)
|
||||
@@ -1,110 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
from kafka import KafkaConsumer, KafkaProducer
|
||||
|
||||
|
||||
class Producer(threading.Thread):
|
||||
|
||||
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
||||
super(Producer, self).__init__()
|
||||
self.bootstrap_servers = bootstrap_servers
|
||||
self.topic = topic
|
||||
self.stop_event = stop_event
|
||||
self.big_msg = b'1' * msg_size
|
||||
|
||||
def run(self):
|
||||
producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers)
|
||||
self.sent = 0
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
producer.send(self.topic, self.big_msg)
|
||||
self.sent += 1
|
||||
producer.flush()
|
||||
producer.close()
|
||||
|
||||
|
||||
class Consumer(threading.Thread):
|
||||
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
||||
super(Consumer, self).__init__()
|
||||
self.bootstrap_servers = bootstrap_servers
|
||||
self.topic = topic
|
||||
self.stop_event = stop_event
|
||||
self.msg_size = msg_size
|
||||
|
||||
def run(self):
|
||||
consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
|
||||
auto_offset_reset='earliest')
|
||||
consumer.subscribe([self.topic])
|
||||
self.valid = 0
|
||||
self.invalid = 0
|
||||
|
||||
for message in consumer:
|
||||
if len(message.value) == self.msg_size:
|
||||
self.valid += 1
|
||||
else:
|
||||
print('Invalid message:', len(message.value), self.msg_size)
|
||||
self.invalid += 1
|
||||
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
consumer.close()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to demonstrate consumer and producer load.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'),
|
||||
help='host:port for cluster bootstrap servers (default: localhost:9092)')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic for load test (default: kafka-python-benchmark-load-example)',
|
||||
default='kafka-python-benchmark-load-example')
|
||||
parser.add_argument(
|
||||
'--msg-size', type=int,
|
||||
help='Message size, in bytes, for load test (default: 524288)',
|
||||
default=524288)
|
||||
parser.add_argument(
|
||||
'--load-time', type=int,
|
||||
help='number of seconds to run load test (default: 10)',
|
||||
default=10)
|
||||
parser.add_argument(
|
||||
'--log-level', type=str,
|
||||
help='Optional logging level for load test: ERROR|INFO|DEBUG etc',
|
||||
default=None)
|
||||
return parser
|
||||
|
||||
|
||||
def main(args):
|
||||
if args.log_level:
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
|
||||
level=getattr(logging, args.log_level))
|
||||
producer_stop = threading.Event()
|
||||
consumer_stop = threading.Event()
|
||||
threads = [
|
||||
Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size),
|
||||
Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
time.sleep(args.load_time)
|
||||
producer_stop.set()
|
||||
consumer_stop.set()
|
||||
print('Messages sent: %d' % threads[0].sent)
|
||||
print('Messages recvd: %d' % threads[1].valid)
|
||||
print('Messages invalid: %d' % threads[1].invalid)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = get_args_parser().parse_args()
|
||||
main(args)
|
||||
@@ -1,153 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import argparse
|
||||
import pprint
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from kafka.vendor.six.moves import range
|
||||
|
||||
from kafka import KafkaProducer
|
||||
|
||||
|
||||
class ProducerPerformance(object):
|
||||
@staticmethod
|
||||
def run(args):
|
||||
try:
|
||||
props = {}
|
||||
for prop in args.producer_config:
|
||||
k, v = prop.split('=')
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
if v == 'None':
|
||||
v = None
|
||||
elif v == 'False':
|
||||
v = False
|
||||
elif v == 'True':
|
||||
v = True
|
||||
props[k] = v
|
||||
|
||||
print('Initializing producer...')
|
||||
props['bootstrap_servers'] = args.bootstrap_servers
|
||||
record = bytes(bytearray(args.record_size))
|
||||
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
||||
|
||||
producer = KafkaProducer(**props)
|
||||
for k, v in props.items():
|
||||
print('---> {0}={1}'.format(k, v))
|
||||
print('---> send {0} byte records'.format(args.record_size))
|
||||
print('---> report stats every {0} secs'.format(args.stats_interval))
|
||||
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
||||
timer_stop = threading.Event()
|
||||
timer = StatsReporter(args.stats_interval, producer,
|
||||
event=timer_stop,
|
||||
raw_metrics=args.raw_metrics)
|
||||
timer.start()
|
||||
print('-> OK!')
|
||||
print()
|
||||
|
||||
def _benchmark():
|
||||
results = []
|
||||
for i in range(args.num_records):
|
||||
results.append(producer.send(topic=args.topic, value=record))
|
||||
print("Send complete...")
|
||||
producer.flush()
|
||||
producer.close()
|
||||
count_success, count_failure = 0, 0
|
||||
for r in results:
|
||||
if r.succeeded():
|
||||
count_success += 1
|
||||
elif r.failed():
|
||||
count_failure += 1
|
||||
else:
|
||||
raise ValueError(r)
|
||||
print("%d suceeded, %d failed" % (count_success, count_failure))
|
||||
|
||||
start_time = time.time()
|
||||
_benchmark()
|
||||
end_time = time.time()
|
||||
timer_stop.set()
|
||||
timer.join()
|
||||
print('Execution time:', end_time - start_time, 'secs')
|
||||
|
||||
except Exception:
|
||||
exc_info = sys.exc_info()
|
||||
traceback.print_exception(*exc_info)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class StatsReporter(threading.Thread):
|
||||
def __init__(self, interval, producer, event=None, raw_metrics=False):
|
||||
super(StatsReporter, self).__init__()
|
||||
self.interval = interval
|
||||
self.producer = producer
|
||||
self.event = event
|
||||
self.raw_metrics = raw_metrics
|
||||
|
||||
def print_stats(self):
|
||||
metrics = self.producer.metrics()
|
||||
if not metrics:
|
||||
return
|
||||
if self.raw_metrics:
|
||||
pprint.pprint(metrics)
|
||||
else:
|
||||
print('{record-send-rate} records/sec ({byte-rate} B/sec),'
|
||||
' {request-latency-avg} latency,'
|
||||
' {record-size-avg} record size,'
|
||||
' {batch-size-avg} batch size,'
|
||||
' {records-per-request-avg} records/req'
|
||||
.format(**metrics['producer-metrics']))
|
||||
|
||||
def print_final(self):
|
||||
self.print_stats()
|
||||
|
||||
def run(self):
|
||||
while self.event and not self.event.wait(self.interval):
|
||||
self.print_stats()
|
||||
else:
|
||||
self.print_final()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to verify the producer performance.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=(),
|
||||
help='host:port for cluster bootstrap server')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic name for test (default: kafka-python-benchmark-test)',
|
||||
default='kafka-python-benchmark-test')
|
||||
parser.add_argument(
|
||||
'--num-records', type=int,
|
||||
help='number of messages to produce (default: 1000000)',
|
||||
default=1000000)
|
||||
parser.add_argument(
|
||||
'--record-size', type=int,
|
||||
help='message size in bytes (default: 100)',
|
||||
default=100)
|
||||
parser.add_argument(
|
||||
'--producer-config', type=str, nargs='+', default=(),
|
||||
help='kafka producer related configuaration properties like '
|
||||
'bootstrap_servers,client_id etc..')
|
||||
parser.add_argument(
|
||||
'--stats-interval', type=int,
|
||||
help='Interval in seconds for stats reporting to console (default: 5)',
|
||||
default=5)
|
||||
parser.add_argument(
|
||||
'--raw-metrics', action='store_true',
|
||||
help='Enable this flag to print full metrics dict on each interval')
|
||||
return parser
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = get_args_parser().parse_args()
|
||||
ProducerPerformance.run(args)
|
||||
@@ -1,78 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
import hashlib
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
|
||||
import pyperf
|
||||
|
||||
from kafka.record.memory_records import MemoryRecordsBuilder
|
||||
|
||||
|
||||
DEFAULT_BATCH_SIZE = 1600 * 1024
|
||||
KEY_SIZE = 6
|
||||
VALUE_SIZE = 60
|
||||
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
||||
|
||||
# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
|
||||
MESSAGES_PER_BATCH = 100
|
||||
|
||||
|
||||
def random_bytes(length):
|
||||
buffer = bytearray(length)
|
||||
for i in range(length):
|
||||
buffer[i] = random.randint(0, 255)
|
||||
return bytes(buffer)
|
||||
|
||||
|
||||
def prepare():
|
||||
return iter(itertools.cycle([
|
||||
(random_bytes(KEY_SIZE),
|
||||
random_bytes(VALUE_SIZE),
|
||||
random.randint(*TIMESTAMP_RANGE)
|
||||
)
|
||||
for _ in range(int(MESSAGES_PER_BATCH * 1.94))
|
||||
]))
|
||||
|
||||
|
||||
def finalize(results):
|
||||
# Just some strange code to make sure PyPy does execute the main code
|
||||
# properly, without optimizing it away
|
||||
hash_val = hashlib.md5()
|
||||
for buf in results:
|
||||
hash_val.update(buf)
|
||||
print(hash_val, file=open(os.devnull, "w"))
|
||||
|
||||
|
||||
def func(loops, magic):
|
||||
# Jit can optimize out the whole function if the result is the same each
|
||||
# time, so we need some randomized input data )
|
||||
precomputed_samples = prepare()
|
||||
results = []
|
||||
|
||||
# Main benchmark code.
|
||||
t0 = pyperf.perf_counter()
|
||||
for _ in range(loops):
|
||||
batch = MemoryRecordsBuilder(
|
||||
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
||||
for _ in range(MESSAGES_PER_BATCH):
|
||||
key, value, timestamp = next(precomputed_samples)
|
||||
size = batch.append(
|
||||
timestamp=timestamp, key=key, value=value)
|
||||
assert size
|
||||
batch.close()
|
||||
results.append(batch.buffer())
|
||||
|
||||
res = pyperf.perf_counter() - t0
|
||||
|
||||
finalize(results)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
runner = pyperf.Runner()
|
||||
runner.bench_time_func('batch_append_v0', func, 0)
|
||||
runner.bench_time_func('batch_append_v1', func, 1)
|
||||
runner.bench_time_func('batch_append_v2', func, 2)
|
||||
@@ -1,83 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import hashlib
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
|
||||
import pyperf
|
||||
|
||||
from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
|
||||
|
||||
|
||||
DEFAULT_BATCH_SIZE = 1600 * 1024
|
||||
KEY_SIZE = 6
|
||||
VALUE_SIZE = 60
|
||||
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
||||
|
||||
BATCH_SAMPLES = 5
|
||||
MESSAGES_PER_BATCH = 100
|
||||
|
||||
|
||||
def random_bytes(length):
|
||||
buffer = bytearray(length)
|
||||
for i in range(length):
|
||||
buffer[i] = random.randint(0, 255)
|
||||
return bytes(buffer)
|
||||
|
||||
|
||||
def prepare(magic):
|
||||
samples = []
|
||||
for _ in range(BATCH_SAMPLES):
|
||||
batch = MemoryRecordsBuilder(
|
||||
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
||||
for _ in range(MESSAGES_PER_BATCH):
|
||||
size = batch.append(
|
||||
random.randint(*TIMESTAMP_RANGE),
|
||||
random_bytes(KEY_SIZE),
|
||||
random_bytes(VALUE_SIZE),
|
||||
headers=[])
|
||||
assert size
|
||||
batch.close()
|
||||
samples.append(bytes(batch.buffer()))
|
||||
|
||||
return iter(itertools.cycle(samples))
|
||||
|
||||
|
||||
def finalize(results):
|
||||
# Just some strange code to make sure PyPy does execute the code above
|
||||
# properly
|
||||
hash_val = hashlib.md5()
|
||||
for buf in results:
|
||||
hash_val.update(buf)
|
||||
print(hash_val, file=open(os.devnull, "w"))
|
||||
|
||||
|
||||
def func(loops, magic):
|
||||
# Jit can optimize out the whole function if the result is the same each
|
||||
# time, so we need some randomized input data )
|
||||
precomputed_samples = prepare(magic)
|
||||
results = []
|
||||
|
||||
# Main benchmark code.
|
||||
batch_data = next(precomputed_samples)
|
||||
t0 = pyperf.perf_counter()
|
||||
for _ in range(loops):
|
||||
records = MemoryRecords(batch_data)
|
||||
while records.has_next():
|
||||
batch = records.next_batch()
|
||||
batch.validate_crc()
|
||||
for record in batch:
|
||||
results.append(record.value)
|
||||
|
||||
res = pyperf.perf_counter() - t0
|
||||
finalize(results)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
runner = pyperf.Runner()
|
||||
runner.bench_time_func('batch_read_v0', func, 0)
|
||||
runner.bench_time_func('batch_read_v1', func, 1)
|
||||
runner.bench_time_func('batch_read_v2', func, 2)
|
||||
@@ -1,434 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import pyperf
|
||||
from kafka.vendor import six
|
||||
|
||||
|
||||
test_data = [
|
||||
(b"\x00", 0),
|
||||
(b"\x01", -1),
|
||||
(b"\x02", 1),
|
||||
(b"\x7E", 63),
|
||||
(b"\x7F", -64),
|
||||
(b"\x80\x01", 64),
|
||||
(b"\x81\x01", -65),
|
||||
(b"\xFE\x7F", 8191),
|
||||
(b"\xFF\x7F", -8192),
|
||||
(b"\x80\x80\x01", 8192),
|
||||
(b"\x81\x80\x01", -8193),
|
||||
(b"\xFE\xFF\x7F", 1048575),
|
||||
(b"\xFF\xFF\x7F", -1048576),
|
||||
(b"\x80\x80\x80\x01", 1048576),
|
||||
(b"\x81\x80\x80\x01", -1048577),
|
||||
(b"\xFE\xFF\xFF\x7F", 134217727),
|
||||
(b"\xFF\xFF\xFF\x7F", -134217728),
|
||||
(b"\x80\x80\x80\x80\x01", 134217728),
|
||||
(b"\x81\x80\x80\x80\x01", -134217729),
|
||||
(b"\xFE\xFF\xFF\xFF\x7F", 17179869183),
|
||||
(b"\xFF\xFF\xFF\xFF\x7F", -17179869184),
|
||||
(b"\x80\x80\x80\x80\x80\x01", 17179869184),
|
||||
(b"\x81\x80\x80\x80\x80\x01", -17179869185),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\x7F", 2199023255551),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\x7F", -2199023255552),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x01", 2199023255552),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x01", -2199023255553),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\x7F", 281474976710655),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -281474976710656),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x01", 281474976710656),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x01", -281474976710657),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 36028797018963967),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -36028797018963968),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x80\x01", 36028797018963968),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x80\x01", -36028797018963969),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 4611686018427387903),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -4611686018427387904),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 4611686018427387904),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x01", -4611686018427387905),
|
||||
]
|
||||
|
||||
|
||||
BENCH_VALUES_ENC = [
|
||||
60, # 1 byte
|
||||
-8192, # 2 bytes
|
||||
1048575, # 3 bytes
|
||||
134217727, # 4 bytes
|
||||
-17179869184, # 5 bytes
|
||||
2199023255551, # 6 bytes
|
||||
]
|
||||
|
||||
BENCH_VALUES_DEC = [
|
||||
b"\x7E", # 1 byte
|
||||
b"\xFF\x7F", # 2 bytes
|
||||
b"\xFE\xFF\x7F", # 3 bytes
|
||||
b"\xFF\xFF\xFF\x7F", # 4 bytes
|
||||
b"\x80\x80\x80\x80\x01", # 5 bytes
|
||||
b"\xFE\xFF\xFF\xFF\xFF\x7F", # 6 bytes
|
||||
]
|
||||
BENCH_VALUES_DEC = list(map(bytearray, BENCH_VALUES_DEC))
|
||||
|
||||
|
||||
def _assert_valid_enc(enc_func):
|
||||
for encoded, decoded in test_data:
|
||||
assert enc_func(decoded) == encoded, decoded
|
||||
|
||||
|
||||
def _assert_valid_dec(dec_func):
|
||||
for encoded, decoded in test_data:
|
||||
res, pos = dec_func(bytearray(encoded))
|
||||
assert res == decoded, (decoded, res)
|
||||
assert pos == len(encoded), (decoded, pos)
|
||||
|
||||
|
||||
def _assert_valid_size(size_func):
|
||||
for encoded, decoded in test_data:
|
||||
assert size_func(decoded) == len(encoded), decoded
|
||||
|
||||
|
||||
def encode_varint_1(num):
|
||||
""" Encode an integer to a varint presentation. See
|
||||
https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
|
||||
on how those can be produced.
|
||||
|
||||
Arguments:
|
||||
num (int): Value to encode
|
||||
|
||||
Returns:
|
||||
bytearray: Encoded presentation of integer with length from 1 to 10
|
||||
bytes
|
||||
"""
|
||||
# Shift sign to the end of number
|
||||
num = (num << 1) ^ (num >> 63)
|
||||
# Max 10 bytes. We assert those are allocated
|
||||
buf = bytearray(10)
|
||||
|
||||
for i in range(10):
|
||||
# 7 lowest bits from the number and set 8th if we still have pending
|
||||
# bits left to encode
|
||||
buf[i] = num & 0x7f | (0x80 if num > 0x7f else 0)
|
||||
num = num >> 7
|
||||
if num == 0:
|
||||
break
|
||||
else:
|
||||
# Max size of endcoded double is 10 bytes for unsigned values
|
||||
raise ValueError("Out of double range")
|
||||
return buf[:i + 1]
|
||||
|
||||
|
||||
def encode_varint_2(value, int2byte=six.int2byte):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
res = b""
|
||||
while value:
|
||||
res += int2byte(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
return res + int2byte(bits)
|
||||
|
||||
|
||||
def encode_varint_3(value, buf):
|
||||
append = buf.append
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
while value:
|
||||
append(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
append(bits)
|
||||
return value
|
||||
|
||||
|
||||
def encode_varint_4(value, int2byte=six.int2byte):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
if value <= 0x7f: # 1 byte
|
||||
return int2byte(value)
|
||||
if value <= 0x3fff: # 2 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + int2byte(value >> 7)
|
||||
if value <= 0x1fffff: # 3 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(value >> 14)
|
||||
if value <= 0xfffffff: # 4 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 14) & 0x7f)) + \
|
||||
int2byte(value >> 21)
|
||||
if value <= 0x7ffffffff: # 5 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 14) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 21) & 0x7f)) + \
|
||||
int2byte(value >> 28)
|
||||
else:
|
||||
# Return to general algorithm
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
res = b""
|
||||
while value:
|
||||
res += int2byte(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
return res + int2byte(bits)
|
||||
|
||||
|
||||
def encode_varint_5(value, buf, pos=0):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
while value:
|
||||
buf[pos] = 0x80 | bits
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
pos += 1
|
||||
buf[pos] = bits
|
||||
return pos + 1
|
||||
|
||||
def encode_varint_6(value, buf):
|
||||
append = buf.append
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
if value <= 0x7f: # 1 byte
|
||||
append(value)
|
||||
return 1
|
||||
if value <= 0x3fff: # 2 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(value >> 7)
|
||||
return 2
|
||||
if value <= 0x1fffff: # 3 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(value >> 14)
|
||||
return 3
|
||||
if value <= 0xfffffff: # 4 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(0x80 | ((value >> 14) & 0x7f))
|
||||
append(value >> 21)
|
||||
return 4
|
||||
if value <= 0x7ffffffff: # 5 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(0x80 | ((value >> 14) & 0x7f))
|
||||
append(0x80 | ((value >> 21) & 0x7f))
|
||||
append(value >> 28)
|
||||
return 5
|
||||
else:
|
||||
# Return to general algorithm
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
i = 0
|
||||
while value:
|
||||
append(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
i += 1
|
||||
append(bits)
|
||||
return i
|
||||
|
||||
|
||||
def size_of_varint_1(value):
|
||||
""" Number of bytes needed to encode an integer in variable-length format.
|
||||
"""
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
res = 0
|
||||
while True:
|
||||
res += 1
|
||||
value = value >> 7
|
||||
if value == 0:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
def size_of_varint_2(value):
|
||||
""" Number of bytes needed to encode an integer in variable-length format.
|
||||
"""
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
if value <= 0x7f:
|
||||
return 1
|
||||
if value <= 0x3fff:
|
||||
return 2
|
||||
if value <= 0x1fffff:
|
||||
return 3
|
||||
if value <= 0xfffffff:
|
||||
return 4
|
||||
if value <= 0x7ffffffff:
|
||||
return 5
|
||||
if value <= 0x3ffffffffff:
|
||||
return 6
|
||||
if value <= 0x1ffffffffffff:
|
||||
return 7
|
||||
if value <= 0xffffffffffffff:
|
||||
return 8
|
||||
if value <= 0x7fffffffffffffff:
|
||||
return 9
|
||||
return 10
|
||||
|
||||
|
||||
if six.PY3:
|
||||
def _read_byte(memview, pos):
|
||||
""" Read a byte from memoryview as an integer
|
||||
|
||||
Raises:
|
||||
IndexError: if position is out of bounds
|
||||
"""
|
||||
return memview[pos]
|
||||
else:
|
||||
def _read_byte(memview, pos):
|
||||
""" Read a byte from memoryview as an integer
|
||||
|
||||
Raises:
|
||||
IndexError: if position is out of bounds
|
||||
"""
|
||||
return ord(memview[pos])
|
||||
|
||||
|
||||
def decode_varint_1(buffer, pos=0):
|
||||
""" Decode an integer from a varint presentation. See
|
||||
https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
|
||||
on how those can be produced.
|
||||
|
||||
Arguments:
|
||||
buffer (bytes-like): any object acceptable by ``memoryview``
|
||||
pos (int): optional position to read from
|
||||
|
||||
Returns:
|
||||
(int, int): Decoded int value and next read position
|
||||
"""
|
||||
value = 0
|
||||
shift = 0
|
||||
memview = memoryview(buffer)
|
||||
for i in range(pos, pos + 10):
|
||||
try:
|
||||
byte = _read_byte(memview, i)
|
||||
except IndexError:
|
||||
raise ValueError("End of byte stream")
|
||||
if byte & 0x80 != 0:
|
||||
value |= (byte & 0x7f) << shift
|
||||
shift += 7
|
||||
else:
|
||||
value |= byte << shift
|
||||
break
|
||||
else:
|
||||
# Max size of endcoded double is 10 bytes for unsigned values
|
||||
raise ValueError("Out of double range")
|
||||
# Normalize sign
|
||||
return (value >> 1) ^ -(value & 1), i + 1
|
||||
|
||||
|
||||
def decode_varint_2(buffer, pos=0):
|
||||
result = 0
|
||||
shift = 0
|
||||
while 1:
|
||||
b = buffer[pos]
|
||||
result |= ((b & 0x7f) << shift)
|
||||
pos += 1
|
||||
if not (b & 0x80):
|
||||
# result = result_type(() & mask)
|
||||
return ((result >> 1) ^ -(result & 1), pos)
|
||||
shift += 7
|
||||
if shift >= 64:
|
||||
raise ValueError("Out of int64 range")
|
||||
|
||||
|
||||
def decode_varint_3(buffer, pos=0):
|
||||
result = buffer[pos]
|
||||
if not (result & 0x81):
|
||||
return (result >> 1), pos + 1
|
||||
if not (result & 0x80):
|
||||
return (result >> 1) ^ (~0), pos + 1
|
||||
|
||||
result &= 0x7f
|
||||
pos += 1
|
||||
shift = 7
|
||||
while 1:
|
||||
b = buffer[pos]
|
||||
result |= ((b & 0x7f) << shift)
|
||||
pos += 1
|
||||
if not (b & 0x80):
|
||||
return ((result >> 1) ^ -(result & 1), pos)
|
||||
shift += 7
|
||||
if shift >= 64:
|
||||
raise ValueError("Out of int64 range")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_assert_valid_enc(encode_varint_1)
|
||||
_assert_valid_enc(encode_varint_2)
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray()
|
||||
encode_varint_3(decoded, res)
|
||||
assert res == encoded
|
||||
|
||||
_assert_valid_enc(encode_varint_4)
|
||||
|
||||
# import dis
|
||||
# dis.dis(encode_varint_4)
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray(10)
|
||||
written = encode_varint_5(decoded, res)
|
||||
assert res[:written] == encoded
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray()
|
||||
encode_varint_6(decoded, res)
|
||||
assert res == encoded
|
||||
|
||||
_assert_valid_size(size_of_varint_1)
|
||||
_assert_valid_size(size_of_varint_2)
|
||||
_assert_valid_dec(decode_varint_1)
|
||||
_assert_valid_dec(decode_varint_2)
|
||||
_assert_valid_dec(decode_varint_3)
|
||||
|
||||
# import dis
|
||||
# dis.dis(decode_varint_3)
|
||||
|
||||
runner = pyperf.Runner()
|
||||
# Encode algorithms returning a bytes result
|
||||
for bench_func in [
|
||||
encode_varint_1,
|
||||
encode_varint_2,
|
||||
encode_varint_4]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
|
||||
# Encode algorithms writing to the buffer
|
||||
for bench_func in [
|
||||
encode_varint_3,
|
||||
encode_varint_5,
|
||||
encode_varint_6]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
fname = bench_func.__name__
|
||||
runner.timeit(
|
||||
'{}_{}byte'.format(fname, i + 1),
|
||||
stmt="{}({}, buffer)".format(fname, value),
|
||||
setup="from __main__ import {}; buffer = bytearray(10)".format(
|
||||
fname)
|
||||
)
|
||||
|
||||
# Size algorithms
|
||||
for bench_func in [
|
||||
size_of_varint_1,
|
||||
size_of_varint_2]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
|
||||
# Decode algorithms
|
||||
for bench_func in [
|
||||
decode_varint_1,
|
||||
decode_varint_2,
|
||||
decode_varint_3]:
|
||||
for i, value in enumerate(BENCH_VALUES_DEC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
@@ -19,18 +19,17 @@ except ImportError:
|
||||
from kafka.vendor import six
|
||||
|
||||
from kafka.cluster import ClusterMetadata
|
||||
from kafka.conn import BrokerConnection, ConnectionStates, get_ip_port_afi
|
||||
from kafka.conn import BrokerConnection, ConnectionStates, collect_hosts, get_ip_port_afi
|
||||
from kafka import errors as Errors
|
||||
from kafka.future import Future
|
||||
from kafka.metrics import AnonMeasurable
|
||||
from kafka.metrics.stats import Avg, Count, Rate
|
||||
from kafka.metrics.stats.rate import TimeUnit
|
||||
from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS
|
||||
from kafka.protocol.metadata import MetadataRequest
|
||||
from kafka.util import Dict, Timer, WeakMethod, ensure_valid_topic_name
|
||||
from kafka.util import Dict, WeakMethod
|
||||
# Although this looks unused, it actually monkey-patches socket.socketpair()
|
||||
# and should be left in as long as we're using socket.socketpair() in this file
|
||||
from kafka.vendor import socketpair # noqa: F401
|
||||
from kafka.vendor import socketpair
|
||||
from kafka.version import __version__
|
||||
|
||||
if six.PY2:
|
||||
@@ -76,7 +75,7 @@ class KafkaClient(object):
|
||||
reconnection attempts will continue periodically with this fixed
|
||||
rate. To avoid connection storms, a randomization factor of 0.2
|
||||
will be applied to the backoff resulting in a random range between
|
||||
20% below and 20% above the computed value. Default: 30000.
|
||||
20% below and 20% above the computed value. Default: 1000.
|
||||
request_timeout_ms (int): Client request timeout in milliseconds.
|
||||
Default: 30000.
|
||||
connections_max_idle_ms: Close idle connections after the number of
|
||||
@@ -102,9 +101,6 @@ class KafkaClient(object):
|
||||
which we force a refresh of metadata even if we haven't seen any
|
||||
partition leadership changes to proactively discover any new
|
||||
brokers or partitions. Default: 300000
|
||||
allow_auto_create_topics (bool): Enable/disable auto topic creation
|
||||
on metadata request. Only available with api_version >= (0, 11).
|
||||
Default: True
|
||||
security_protocol (str): Protocol used to communicate with brokers.
|
||||
Valid values are: PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL.
|
||||
Default: PLAINTEXT.
|
||||
@@ -133,24 +129,12 @@ class KafkaClient(object):
|
||||
format. If no cipher can be selected (because compile-time options
|
||||
or other configuration forbids use of all the specified ciphers),
|
||||
an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
|
||||
api_version (tuple): Specify which Kafka API version to use. If set to
|
||||
None, the client will attempt to determine the broker version via
|
||||
ApiVersionsRequest API or, for brokers earlier than 0.10, probing
|
||||
various known APIs. Dynamic version checking is performed eagerly
|
||||
during __init__ and can raise NoBrokersAvailableError if no connection
|
||||
was made before timeout (see api_version_auto_timeout_ms below).
|
||||
Different versions enable different functionality.
|
||||
|
||||
Examples:
|
||||
(3, 9) most recent broker release, enable all supported features
|
||||
(0, 10, 0) enables sasl authentication
|
||||
(0, 8, 0) enables basic functionality only
|
||||
|
||||
Default: None
|
||||
api_version (tuple): Specify which Kafka API version to use. If set
|
||||
to None, KafkaClient will attempt to infer the broker version by
|
||||
probing various APIs. Example: (0, 10, 2). Default: None
|
||||
api_version_auto_timeout_ms (int): number of milliseconds to throw a
|
||||
timeout exception from the constructor when checking the broker
|
||||
api version. Only applies if api_version set to None.
|
||||
Default: 2000
|
||||
api version. Only applies if api_version is None
|
||||
selector (selectors.BaseSelector): Provide a specific selector
|
||||
implementation to use for I/O multiplexing.
|
||||
Default: selectors.DefaultSelector
|
||||
@@ -164,16 +148,12 @@ class KafkaClient(object):
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
|
||||
sasl mechanism handshake. If provided, sasl_kerberos_service_name and
|
||||
sasl_kerberos_domain name are ignored. Default: None.
|
||||
sasl_kerberos_service_name (str): Service name to include in GSSAPI
|
||||
sasl mechanism handshake. Default: 'kafka'
|
||||
sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
|
||||
sasl mechanism handshake. Default: one of bootstrap servers
|
||||
sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
|
||||
token provider instance. Default: None
|
||||
socks5_proxy (str): Socks5 proxy URL. Default: None
|
||||
sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
|
||||
instance. (See kafka.oauth.abstract). Default: None
|
||||
"""
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
@@ -184,7 +164,7 @@ class KafkaClient(object):
|
||||
'wakeup_timeout_ms': 3000,
|
||||
'connections_max_idle_ms': 9 * 60 * 1000,
|
||||
'reconnect_backoff_ms': 50,
|
||||
'reconnect_backoff_max_ms': 30000,
|
||||
'reconnect_backoff_max_ms': 1000,
|
||||
'max_in_flight_requests_per_connection': 5,
|
||||
'receive_buffer_bytes': None,
|
||||
'send_buffer_bytes': None,
|
||||
@@ -192,7 +172,6 @@ class KafkaClient(object):
|
||||
'sock_chunk_bytes': 4096, # undocumented experimental option
|
||||
'sock_chunk_buffer_count': 1000, # undocumented experimental option
|
||||
'retry_backoff_ms': 100,
|
||||
'allow_auto_create_topics': True,
|
||||
'metadata_max_age_ms': 300000,
|
||||
'security_protocol': 'PLAINTEXT',
|
||||
'ssl_context': None,
|
||||
@@ -211,11 +190,9 @@ class KafkaClient(object):
|
||||
'sasl_mechanism': None,
|
||||
'sasl_plain_username': None,
|
||||
'sasl_plain_password': None,
|
||||
'sasl_kerberos_name': None,
|
||||
'sasl_kerberos_service_name': 'kafka',
|
||||
'sasl_kerberos_domain_name': None,
|
||||
'sasl_oauth_token_provider': None,
|
||||
'socks5_proxy': None,
|
||||
'sasl_oauth_token_provider': None
|
||||
}
|
||||
|
||||
def __init__(self, **configs):
|
||||
@@ -227,9 +204,8 @@ class KafkaClient(object):
|
||||
# these properties need to be set on top of the initialization pipeline
|
||||
# because they are used when __del__ method is called
|
||||
self._closed = False
|
||||
self._wake_r, self._wake_w = socket.socketpair()
|
||||
self._selector = self.config['selector']()
|
||||
self._init_wakeup_socketpair()
|
||||
self._wake_lock = threading.Lock()
|
||||
|
||||
self.cluster = ClusterMetadata(**self.config)
|
||||
self._topics = set() # empty set will fetch all topic metadata
|
||||
@@ -238,10 +214,12 @@ class KafkaClient(object):
|
||||
self._api_versions = None
|
||||
self._connecting = set()
|
||||
self._sending = set()
|
||||
|
||||
# Not currently used, but data is collected internally
|
||||
self._refresh_on_disconnects = True
|
||||
self._last_bootstrap = 0
|
||||
self._bootstrap_fails = 0
|
||||
self._wake_r.setblocking(False)
|
||||
self._wake_w.settimeout(self.config['wakeup_timeout_ms'] / 1000.0)
|
||||
self._wake_lock = threading.Lock()
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
@@ -250,6 +228,7 @@ class KafkaClient(object):
|
||||
# lock above.
|
||||
self._pending_completion = collections.deque()
|
||||
|
||||
self._selector.register(self._wake_r, selectors.EVENT_READ)
|
||||
self._idle_expiry_manager = IdleConnectionManager(self.config['connections_max_idle_ms'])
|
||||
self._sensors = None
|
||||
if self.config['metrics']:
|
||||
@@ -257,48 +236,26 @@ class KafkaClient(object):
|
||||
self.config['metric_group_prefix'],
|
||||
weakref.proxy(self._conns))
|
||||
|
||||
self._num_bootstrap_hosts = len(collect_hosts(self.config['bootstrap_servers']))
|
||||
|
||||
# Check Broker Version if not set explicitly
|
||||
if self.config['api_version'] is None:
|
||||
self.config['api_version'] = self.check_version()
|
||||
elif self.config['api_version'] in BROKER_API_VERSIONS:
|
||||
self._api_versions = BROKER_API_VERSIONS[self.config['api_version']]
|
||||
elif (self.config['api_version'] + (0,)) in BROKER_API_VERSIONS:
|
||||
log.warning('Configured api_version %s is ambiguous; using %s',
|
||||
self.config['api_version'], self.config['api_version'] + (0,))
|
||||
self.config['api_version'] = self.config['api_version'] + (0,)
|
||||
self._api_versions = BROKER_API_VERSIONS[self.config['api_version']]
|
||||
else:
|
||||
compatible_version = None
|
||||
for v in sorted(BROKER_API_VERSIONS.keys(), reverse=True):
|
||||
if v <= self.config['api_version']:
|
||||
compatible_version = v
|
||||
break
|
||||
if compatible_version:
|
||||
log.warning('Configured api_version %s not supported; using %s',
|
||||
self.config['api_version'], compatible_version)
|
||||
self.config['api_version'] = compatible_version
|
||||
self._api_versions = BROKER_API_VERSIONS[compatible_version]
|
||||
else:
|
||||
raise Errors.UnrecognizedBrokerVersion(self.config['api_version'])
|
||||
check_timeout = self.config['api_version_auto_timeout_ms'] / 1000
|
||||
self.config['api_version'] = self.check_version(timeout=check_timeout)
|
||||
|
||||
def _init_wakeup_socketpair(self):
|
||||
self._wake_r, self._wake_w = socket.socketpair()
|
||||
self._wake_r.setblocking(False)
|
||||
self._wake_w.settimeout(self.config['wakeup_timeout_ms'] / 1000.0)
|
||||
self._waking = False
|
||||
self._selector.register(self._wake_r, selectors.EVENT_READ)
|
||||
def _can_bootstrap(self):
|
||||
effective_failures = self._bootstrap_fails // self._num_bootstrap_hosts
|
||||
backoff_factor = 2 ** effective_failures
|
||||
backoff_ms = min(self.config['reconnect_backoff_ms'] * backoff_factor,
|
||||
self.config['reconnect_backoff_max_ms'])
|
||||
|
||||
def _close_wakeup_socketpair(self):
|
||||
if self._wake_r is not None:
|
||||
try:
|
||||
self._selector.unregister(self._wake_r)
|
||||
except (KeyError, ValueError, TypeError):
|
||||
pass
|
||||
self._wake_r.close()
|
||||
if self._wake_w is not None:
|
||||
self._wake_w.close()
|
||||
self._wake_r = None
|
||||
self._wake_w = None
|
||||
backoff_ms *= random.uniform(0.8, 1.2)
|
||||
|
||||
next_at = self._last_bootstrap + backoff_ms / 1000.0
|
||||
now = time.time()
|
||||
if next_at > now:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _can_connect(self, node_id):
|
||||
if node_id not in self._conns:
|
||||
@@ -310,7 +267,7 @@ class KafkaClient(object):
|
||||
|
||||
def _conn_state_change(self, node_id, sock, conn):
|
||||
with self._lock:
|
||||
if conn.state is ConnectionStates.CONNECTING:
|
||||
if conn.connecting():
|
||||
# SSL connections can enter this state 2x (second during Handshake)
|
||||
if node_id not in self._connecting:
|
||||
self._connecting.add(node_id)
|
||||
@@ -322,19 +279,7 @@ class KafkaClient(object):
|
||||
if self.cluster.is_bootstrap(node_id):
|
||||
self._last_bootstrap = time.time()
|
||||
|
||||
elif conn.state is ConnectionStates.API_VERSIONS_SEND:
|
||||
try:
|
||||
self._selector.register(sock, selectors.EVENT_WRITE, conn)
|
||||
except KeyError:
|
||||
self._selector.modify(sock, selectors.EVENT_WRITE, conn)
|
||||
|
||||
elif conn.state in (ConnectionStates.API_VERSIONS_RECV, ConnectionStates.AUTHENTICATING):
|
||||
try:
|
||||
self._selector.register(sock, selectors.EVENT_READ, conn)
|
||||
except KeyError:
|
||||
self._selector.modify(sock, selectors.EVENT_READ, conn)
|
||||
|
||||
elif conn.state is ConnectionStates.CONNECTED:
|
||||
elif conn.connected():
|
||||
log.debug("Node %s connected", node_id)
|
||||
if node_id in self._connecting:
|
||||
self._connecting.remove(node_id)
|
||||
@@ -351,8 +296,6 @@ class KafkaClient(object):
|
||||
|
||||
if self.cluster.is_bootstrap(node_id):
|
||||
self._bootstrap_fails = 0
|
||||
if self._api_versions is None:
|
||||
self._api_versions = conn._api_versions
|
||||
|
||||
else:
|
||||
for node_id in list(self._conns.keys()):
|
||||
@@ -365,7 +308,7 @@ class KafkaClient(object):
|
||||
self._connecting.remove(node_id)
|
||||
try:
|
||||
self._selector.unregister(sock)
|
||||
except (KeyError, ValueError):
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if self._sensors:
|
||||
@@ -384,7 +327,7 @@ class KafkaClient(object):
|
||||
elif self.cluster.is_bootstrap(node_id):
|
||||
self._bootstrap_fails += 1
|
||||
|
||||
elif conn.connect_failed() and not self._closed and not idle_disconnect:
|
||||
elif self._refresh_on_disconnects and not self._closed and not idle_disconnect:
|
||||
log.warning("Node %s connection failed -- refreshing metadata", node_id)
|
||||
self.cluster.request_update()
|
||||
|
||||
@@ -400,11 +343,6 @@ class KafkaClient(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
def connection_failed(self, node_id):
|
||||
if node_id not in self._conns:
|
||||
return False
|
||||
return self._conns[node_id].connect_failed()
|
||||
|
||||
def _should_recycle_connection(self, conn):
|
||||
# Never recycle unless disconnected
|
||||
if not conn.disconnected():
|
||||
@@ -415,7 +353,7 @@ class KafkaClient(object):
|
||||
if broker is None:
|
||||
return False
|
||||
|
||||
host, _, _ = get_ip_port_afi(broker.host)
|
||||
host, _, afi = get_ip_port_afi(broker.host)
|
||||
if conn.host != host or conn.port != broker.port:
|
||||
log.info("Broker metadata change detected for node %s"
|
||||
" from %s:%s to %s:%s", conn.node_id, conn.host, conn.port,
|
||||
@@ -424,24 +362,14 @@ class KafkaClient(object):
|
||||
|
||||
return False
|
||||
|
||||
def _init_connect(self, node_id):
|
||||
"""Idempotent non-blocking connection attempt to the given node id.
|
||||
|
||||
Returns True if connection object exists and is connected / connecting
|
||||
"""
|
||||
def _maybe_connect(self, node_id):
|
||||
"""Idempotent non-blocking connection attempt to the given node id."""
|
||||
with self._lock:
|
||||
conn = self._conns.get(node_id)
|
||||
|
||||
# Check if existing connection should be recreated because host/port changed
|
||||
if conn is not None and self._should_recycle_connection(conn):
|
||||
self._conns.pop(node_id).close()
|
||||
conn = None
|
||||
|
||||
if conn is None:
|
||||
broker = self.cluster.broker_metadata(node_id)
|
||||
if broker is None:
|
||||
log.debug('Broker id %s not in current metadata', node_id)
|
||||
return False
|
||||
assert broker, 'Broker id %s not in current metadata' % (node_id,)
|
||||
|
||||
log.debug("Initiating connection to node %s at %s:%s",
|
||||
node_id, broker.host, broker.port)
|
||||
@@ -453,9 +381,16 @@ class KafkaClient(object):
|
||||
**self.config)
|
||||
self._conns[node_id] = conn
|
||||
|
||||
if conn.disconnected():
|
||||
conn.connect()
|
||||
return not conn.disconnected()
|
||||
# Check if existing connection should be recreated because host/port changed
|
||||
elif self._should_recycle_connection(conn):
|
||||
self._conns.pop(node_id)
|
||||
return False
|
||||
|
||||
elif conn.connected():
|
||||
return True
|
||||
|
||||
conn.connect()
|
||||
return conn.connected()
|
||||
|
||||
def ready(self, node_id, metadata_priority=True):
|
||||
"""Check whether a node is connected and ok to send more requests.
|
||||
@@ -481,7 +416,8 @@ class KafkaClient(object):
|
||||
def _close(self):
|
||||
if not self._closed:
|
||||
self._closed = True
|
||||
self._close_wakeup_socketpair()
|
||||
self._wake_r.close()
|
||||
self._wake_w.close()
|
||||
self._selector.close()
|
||||
|
||||
def close(self, node_id=None):
|
||||
@@ -528,8 +464,9 @@ class KafkaClient(object):
|
||||
def connection_delay(self, node_id):
|
||||
"""
|
||||
Return the number of milliseconds to wait, based on the connection
|
||||
state, before attempting to send data. When connecting or disconnected,
|
||||
this respects the reconnect backoff time. When connected, returns a very large
|
||||
state, before attempting to send data. When disconnected, this respects
|
||||
the reconnect backoff time. When connecting, returns 0 to allow
|
||||
non-blocking connect to finish. When connected, returns a very large
|
||||
number to handle slow/stalled connections.
|
||||
|
||||
Arguments:
|
||||
@@ -543,16 +480,6 @@ class KafkaClient(object):
|
||||
return 0
|
||||
return conn.connection_delay()
|
||||
|
||||
def throttle_delay(self, node_id):
|
||||
"""
|
||||
Return the number of milliseconds to wait until a broker is no longer throttled.
|
||||
When disconnected / connecting, returns 0.
|
||||
"""
|
||||
conn = self._conns.get(node_id)
|
||||
if conn is None:
|
||||
return 0
|
||||
return conn.throttle_delay()
|
||||
|
||||
def is_ready(self, node_id, metadata_priority=True):
|
||||
"""Check whether a node is ready to send more requests.
|
||||
|
||||
@@ -585,7 +512,7 @@ class KafkaClient(object):
|
||||
return False
|
||||
return conn.connected() and conn.can_send_more()
|
||||
|
||||
def send(self, node_id, request, wakeup=True, request_timeout_ms=None):
|
||||
def send(self, node_id, request, wakeup=True):
|
||||
"""Send a request to a specific node. Bytes are placed on an
|
||||
internal per-connection send-queue. Actual network I/O will be
|
||||
triggered in a subsequent call to .poll()
|
||||
@@ -593,13 +520,7 @@ class KafkaClient(object):
|
||||
Arguments:
|
||||
node_id (int): destination node
|
||||
request (Struct): request object (not-encoded)
|
||||
|
||||
Keyword Arguments:
|
||||
wakeup (bool, optional): optional flag to disable thread-wakeup.
|
||||
request_timeout_ms (int, optional): Provide custom timeout in milliseconds.
|
||||
If response is not processed before timeout, client will fail the
|
||||
request and close the connection.
|
||||
Default: None (uses value from client configuration)
|
||||
wakeup (bool): optional flag to disable thread-wakeup
|
||||
|
||||
Raises:
|
||||
AssertionError: if node_id is not in current cluster metadata
|
||||
@@ -615,9 +536,8 @@ class KafkaClient(object):
|
||||
# conn.send will queue the request internally
|
||||
# we will need to call send_pending_requests()
|
||||
# to trigger network I/O
|
||||
future = conn.send(request, blocking=False, request_timeout_ms=request_timeout_ms)
|
||||
if not future.is_done:
|
||||
self._sending.add(conn)
|
||||
future = conn.send(request, blocking=False)
|
||||
self._sending.add(conn)
|
||||
|
||||
# Wakeup signal is useful in case another thread is
|
||||
# blocked waiting for incoming network traffic while holding
|
||||
@@ -643,9 +563,12 @@ class KafkaClient(object):
|
||||
Returns:
|
||||
list: responses received (can be empty)
|
||||
"""
|
||||
if not isinstance(timeout_ms, (int, float, type(None))):
|
||||
if future is not None:
|
||||
timeout_ms = 100
|
||||
elif timeout_ms is None:
|
||||
timeout_ms = self.config['request_timeout_ms']
|
||||
elif not isinstance(timeout_ms, (int, float)):
|
||||
raise TypeError('Invalid type for timeout: %s' % type(timeout_ms))
|
||||
timer = Timer(timeout_ms)
|
||||
|
||||
# Loop for futures, break after first loop if None
|
||||
responses = []
|
||||
@@ -656,30 +579,24 @@ class KafkaClient(object):
|
||||
|
||||
# Attempt to complete pending connections
|
||||
for node_id in list(self._connecting):
|
||||
# False return means no more connection progress is possible
|
||||
# Connected nodes will update _connecting via state_change callback
|
||||
if not self._init_connect(node_id):
|
||||
# It's possible that the connection attempt triggered a state change
|
||||
# but if not, make sure to remove from _connecting list
|
||||
if node_id in self._connecting:
|
||||
self._connecting.remove(node_id)
|
||||
self._maybe_connect(node_id)
|
||||
|
||||
# Send a metadata request if needed (or initiate new connection)
|
||||
# Send a metadata request if needed
|
||||
metadata_timeout_ms = self._maybe_refresh_metadata()
|
||||
|
||||
# If we got a future that is already done, don't block in _poll
|
||||
if future is not None and future.is_done:
|
||||
timeout = 0
|
||||
else:
|
||||
user_timeout_ms = timer.timeout_ms if timeout_ms is not None else self.config['request_timeout_ms']
|
||||
idle_connection_timeout_ms = self._idle_expiry_manager.next_check_ms()
|
||||
request_timeout_ms = self._next_ifr_request_timeout_ms()
|
||||
log.debug("Timeouts: user %f, metadata %f, idle connection %f, request %f", user_timeout_ms, metadata_timeout_ms, idle_connection_timeout_ms, request_timeout_ms)
|
||||
timeout = min(
|
||||
user_timeout_ms,
|
||||
timeout_ms,
|
||||
metadata_timeout_ms,
|
||||
idle_connection_timeout_ms,
|
||||
request_timeout_ms)
|
||||
self.config['request_timeout_ms'])
|
||||
# if there are no requests in flight, do not block longer than the retry backoff
|
||||
if self.in_flight_request_count() == 0:
|
||||
timeout = min(timeout, self.config['retry_backoff_ms'])
|
||||
timeout = max(0, timeout) # avoid negative timeouts
|
||||
|
||||
self._poll(timeout / 1000)
|
||||
@@ -690,11 +607,7 @@ class KafkaClient(object):
|
||||
|
||||
# If all we had was a timeout (future is None) - only do one poll
|
||||
# If we do have a future, we keep looping until it is done
|
||||
if future is None:
|
||||
break
|
||||
elif future.is_done:
|
||||
break
|
||||
elif timeout_ms is not None and timer.expired:
|
||||
if future is None or future.is_done:
|
||||
break
|
||||
|
||||
return responses
|
||||
@@ -702,8 +615,6 @@ class KafkaClient(object):
|
||||
def _register_send_sockets(self):
|
||||
while self._sending:
|
||||
conn = self._sending.pop()
|
||||
if conn._sock is None:
|
||||
continue
|
||||
try:
|
||||
key = self._selector.get_key(conn._sock)
|
||||
events = key.events | selectors.EVENT_WRITE
|
||||
@@ -712,11 +623,6 @@ class KafkaClient(object):
|
||||
self._selector.register(conn._sock, selectors.EVENT_WRITE, conn)
|
||||
|
||||
def _poll(self, timeout):
|
||||
# Python throws OverflowError if timeout is > 2147483647 milliseconds
|
||||
# (though the param to selector.select is in seconds)
|
||||
# so convert any too-large timeout to blocking
|
||||
if timeout > 2147483:
|
||||
timeout = None
|
||||
# This needs to be locked, but since it is only called from within the
|
||||
# locked section of poll(), there is no additional lock acquisition here
|
||||
processed = set()
|
||||
@@ -789,13 +695,11 @@ class KafkaClient(object):
|
||||
|
||||
for conn in six.itervalues(self._conns):
|
||||
if conn.requests_timed_out():
|
||||
timed_out = conn.timed_out_ifrs()
|
||||
timeout_ms = (timed_out[0][2] - timed_out[0][1]) * 1000
|
||||
log.warning('%s timed out after %s ms. Closing connection.',
|
||||
conn, timeout_ms)
|
||||
conn, conn.config['request_timeout_ms'])
|
||||
conn.close(error=Errors.RequestTimedOutError(
|
||||
'Request timed out after %s ms' %
|
||||
timeout_ms))
|
||||
conn.config['request_timeout_ms']))
|
||||
|
||||
if self._sensors:
|
||||
self._sensors.io_time.record((time.time() - end_select) * 1000000000)
|
||||
@@ -833,17 +737,16 @@ class KafkaClient(object):
|
||||
break
|
||||
future.success(response)
|
||||
responses.append(response)
|
||||
|
||||
return responses
|
||||
|
||||
def least_loaded_node(self):
|
||||
"""Choose the node with fewest outstanding requests, with fallbacks.
|
||||
|
||||
This method will prefer a node with an existing connection (not throttled)
|
||||
with no in-flight-requests. If no such node is found, a node will be chosen
|
||||
randomly from all nodes that are not throttled or "blacked out" (i.e.,
|
||||
This method will prefer a node with an existing connection and no
|
||||
in-flight-requests. If no such node is found, a node will be chosen
|
||||
randomly from disconnected nodes that are not "blacked out" (i.e.,
|
||||
are not subject to a reconnect backoff). If no node metadata has been
|
||||
obtained, will return a bootstrap node.
|
||||
obtained, will return a bootstrap node (subject to exponential backoff).
|
||||
|
||||
Returns:
|
||||
node_id or None if no suitable node was found
|
||||
@@ -855,11 +758,11 @@ class KafkaClient(object):
|
||||
found = None
|
||||
for node_id in nodes:
|
||||
conn = self._conns.get(node_id)
|
||||
connected = conn is not None and conn.connected() and conn.can_send_more()
|
||||
blacked_out = conn is not None and (conn.blacked_out() or conn.throttled())
|
||||
connected = conn is not None and conn.connected()
|
||||
blacked_out = conn is not None and conn.blacked_out()
|
||||
curr_inflight = len(conn.in_flight_requests) if conn is not None else 0
|
||||
if connected and curr_inflight == 0:
|
||||
# if we find an established connection (not throttled)
|
||||
# if we find an established connection
|
||||
# with no in-flight requests, we can stop right away
|
||||
return node_id
|
||||
elif not blacked_out and curr_inflight < inflight:
|
||||
@@ -869,24 +772,6 @@ class KafkaClient(object):
|
||||
|
||||
return found
|
||||
|
||||
def _refresh_delay_ms(self, node_id):
|
||||
conn = self._conns.get(node_id)
|
||||
if conn is not None and conn.connected():
|
||||
return self.throttle_delay(node_id)
|
||||
else:
|
||||
return self.connection_delay(node_id)
|
||||
|
||||
def least_loaded_node_refresh_ms(self):
|
||||
"""Return connection or throttle delay in milliseconds for next available node.
|
||||
|
||||
This method is used primarily for retry/backoff during metadata refresh
|
||||
during / after a cluster outage, in which there are no available nodes.
|
||||
|
||||
Returns:
|
||||
float: delay_ms
|
||||
"""
|
||||
return min([self._refresh_delay_ms(broker.nodeId) for broker in self.cluster.brokers()])
|
||||
|
||||
def set_topics(self, topics):
|
||||
"""Set specific topics to track for metadata.
|
||||
|
||||
@@ -911,31 +796,19 @@ class KafkaClient(object):
|
||||
|
||||
Returns:
|
||||
Future: resolves after metadata request/response
|
||||
|
||||
Raises:
|
||||
TypeError: if topic is not a string
|
||||
ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
|
||||
"""
|
||||
ensure_valid_topic_name(topic)
|
||||
|
||||
if topic in self._topics:
|
||||
return Future().success(set(self._topics))
|
||||
|
||||
self._topics.add(topic)
|
||||
return self.cluster.request_update()
|
||||
|
||||
def _next_ifr_request_timeout_ms(self):
|
||||
if self._conns:
|
||||
return min([conn.next_ifr_request_timeout_ms() for conn in six.itervalues(self._conns)])
|
||||
else:
|
||||
return float('inf')
|
||||
|
||||
# This method should be locked when running multi-threaded
|
||||
def _maybe_refresh_metadata(self, wakeup=False):
|
||||
"""Send a metadata request if needed.
|
||||
|
||||
Returns:
|
||||
float: milliseconds until next refresh
|
||||
int: milliseconds until next refresh
|
||||
"""
|
||||
ttl = self.cluster.ttl()
|
||||
wait_for_in_progress_ms = self.config['request_timeout_ms'] if self._metadata_refresh_in_progress else 0
|
||||
@@ -949,44 +822,18 @@ class KafkaClient(object):
|
||||
# least_loaded_node()
|
||||
node_id = self.least_loaded_node()
|
||||
if node_id is None:
|
||||
next_connect_ms = self.least_loaded_node_refresh_ms()
|
||||
log.debug("Give up sending metadata request since no node is available. (reconnect delay %d ms)", next_connect_ms)
|
||||
return next_connect_ms
|
||||
log.debug("Give up sending metadata request since no node is available");
|
||||
return self.config['reconnect_backoff_ms']
|
||||
|
||||
if not self._can_send_request(node_id):
|
||||
# If there's any connection establishment underway, wait until it completes. This prevents
|
||||
# the client from unnecessarily connecting to additional nodes while a previous connection
|
||||
# attempt has not been completed.
|
||||
if self._connecting:
|
||||
return float('inf')
|
||||
|
||||
elif self._can_connect(node_id):
|
||||
log.debug("Initializing connection to node %s for metadata request", node_id)
|
||||
self._connecting.add(node_id)
|
||||
if not self._init_connect(node_id):
|
||||
if node_id in self._connecting:
|
||||
self._connecting.remove(node_id)
|
||||
# Connection attempt failed immediately, need to retry with a different node
|
||||
return self.config['reconnect_backoff_ms']
|
||||
else:
|
||||
# Existing connection throttled or max in flight requests.
|
||||
return self.throttle_delay(node_id) or self.config['request_timeout_ms']
|
||||
|
||||
# Recheck node_id in case we were able to connect immediately above
|
||||
if self._can_send_request(node_id):
|
||||
topics = list(self._topics)
|
||||
if not topics and self.cluster.is_bootstrap(node_id):
|
||||
topics = list(self.config['bootstrap_topics_filter'])
|
||||
|
||||
api_version = self.api_version(MetadataRequest, max_version=7)
|
||||
if self.cluster.need_all_topic_metadata:
|
||||
topics = MetadataRequest[api_version].ALL_TOPICS
|
||||
elif not topics:
|
||||
topics = MetadataRequest[api_version].NO_TOPICS
|
||||
if api_version >= 4:
|
||||
request = MetadataRequest[api_version](topics, self.config['allow_auto_create_topics'])
|
||||
else:
|
||||
request = MetadataRequest[api_version](topics)
|
||||
if self.cluster.need_all_topic_metadata or not topics:
|
||||
topics = [] if self.config['api_version'] < (0, 10) else None
|
||||
api_version = 0 if self.config['api_version'] < (0, 10) else 1
|
||||
request = MetadataRequest[api_version](topics)
|
||||
log.debug("Sending metadata request %s to node %s", request, node_id)
|
||||
future = self.send(node_id, request, wakeup=wakeup)
|
||||
future.add_callback(self.cluster.update_metadata)
|
||||
@@ -999,146 +846,103 @@ class KafkaClient(object):
|
||||
future.add_errback(refresh_done)
|
||||
return self.config['request_timeout_ms']
|
||||
|
||||
# Should only get here if still connecting
|
||||
# If there's any connection establishment underway, wait until it completes. This prevents
|
||||
# the client from unnecessarily connecting to additional nodes while a previous connection
|
||||
# attempt has not been completed.
|
||||
if self._connecting:
|
||||
return float('inf')
|
||||
else:
|
||||
return self.config['reconnect_backoff_ms']
|
||||
|
||||
if self.maybe_connect(node_id, wakeup=wakeup):
|
||||
log.debug("Initializing connection to node %s for metadata request", node_id)
|
||||
return self.config['reconnect_backoff_ms']
|
||||
|
||||
# connected but can't send more, OR connecting
|
||||
# In either case we just need to wait for a network event
|
||||
# to let us know the selected connection might be usable again.
|
||||
return float('inf')
|
||||
|
||||
def get_api_versions(self):
|
||||
"""Return the ApiVersions map, if available.
|
||||
|
||||
Note: Only available after bootstrap; requires broker version 0.10.0 or later.
|
||||
Note: A call to check_version must previously have succeeded and returned
|
||||
version 0.10.0 or later
|
||||
|
||||
Returns: a map of dict mapping {api_key : (min_version, max_version)},
|
||||
or None if ApiVersion is not supported by the kafka cluster.
|
||||
"""
|
||||
return self._api_versions
|
||||
|
||||
def check_version(self, node_id=None, timeout=None, **kwargs):
|
||||
def check_version(self, node_id=None, timeout=2, strict=False):
|
||||
"""Attempt to guess the version of a Kafka broker.
|
||||
|
||||
Keyword Arguments:
|
||||
node_id (str, optional): Broker node id from cluster metadata. If None, attempts
|
||||
to connect to any available broker until version is identified.
|
||||
Default: None
|
||||
timeout (num, optional): Maximum time in seconds to try to check broker version.
|
||||
If unable to identify version before timeout, raise error (see below).
|
||||
Default: api_version_auto_timeout_ms / 1000
|
||||
Note: It is possible that this method blocks longer than the
|
||||
specified timeout. This can happen if the entire cluster
|
||||
is down and the client enters a bootstrap backoff sleep.
|
||||
This is only possible if node_id is None.
|
||||
|
||||
Returns: version tuple, i.e. (3, 9), (2, 0), (0, 10, 2) etc
|
||||
Returns: version tuple, i.e. (0, 10), (0, 9), (0, 8, 2), ...
|
||||
|
||||
Raises:
|
||||
NodeNotReadyError (if node_id is provided)
|
||||
NoBrokersAvailable (if node_id is None)
|
||||
UnrecognizedBrokerVersion: please file bug if seen!
|
||||
AssertionError (if strict=True): please file bug if seen!
|
||||
"""
|
||||
timeout = timeout or (self.config['api_version_auto_timeout_ms'] / 1000)
|
||||
with self._lock:
|
||||
end = time.time() + timeout
|
||||
while time.time() < end:
|
||||
time_remaining = max(end - time.time(), 0)
|
||||
if node_id is not None and self.connection_delay(node_id) > 0:
|
||||
sleep_time = min(time_remaining, self.connection_delay(node_id) / 1000.0)
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
try_node = node_id or self.least_loaded_node()
|
||||
if try_node is None:
|
||||
sleep_time = min(time_remaining, self.least_loaded_node_refresh_ms() / 1000.0)
|
||||
if sleep_time > 0:
|
||||
log.warning('No node available during check_version; sleeping %.2f secs', sleep_time)
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
log.debug('Attempting to check version with node %s', try_node)
|
||||
if not self._init_connect(try_node):
|
||||
if try_node == node_id:
|
||||
raise Errors.NodeNotReadyError("Connection failed to %s" % node_id)
|
||||
else:
|
||||
continue
|
||||
conn = self._conns[try_node]
|
||||
self._lock.acquire()
|
||||
end = time.time() + timeout
|
||||
while time.time() < end:
|
||||
|
||||
while conn.connecting() and time.time() < end:
|
||||
timeout_ms = min((end - time.time()) * 1000, 200)
|
||||
self.poll(timeout_ms=timeout_ms)
|
||||
# It is possible that least_loaded_node falls back to bootstrap,
|
||||
# which can block for an increasing backoff period
|
||||
try_node = node_id or self.least_loaded_node()
|
||||
if try_node is None:
|
||||
self._lock.release()
|
||||
raise Errors.NoBrokersAvailable()
|
||||
self._maybe_connect(try_node)
|
||||
conn = self._conns[try_node]
|
||||
|
||||
if conn._api_version is not None:
|
||||
return conn._api_version
|
||||
else:
|
||||
log.debug('Failed to identify api_version after connection attempt to %s', conn)
|
||||
|
||||
# Timeout
|
||||
else:
|
||||
# We will intentionally cause socket failures
|
||||
# These should not trigger metadata refresh
|
||||
self._refresh_on_disconnects = False
|
||||
try:
|
||||
remaining = end - time.time()
|
||||
version = conn.check_version(timeout=remaining, strict=strict, topics=list(self.config['bootstrap_topics_filter']))
|
||||
if version >= (0, 10, 0):
|
||||
# cache the api versions map if it's available (starting
|
||||
# in 0.10 cluster version)
|
||||
self._api_versions = conn.get_api_versions()
|
||||
self._lock.release()
|
||||
return version
|
||||
except Errors.NodeNotReadyError:
|
||||
# Only raise to user if this is a node-specific request
|
||||
if node_id is not None:
|
||||
raise Errors.NodeNotReadyError(node_id)
|
||||
else:
|
||||
raise Errors.NoBrokersAvailable()
|
||||
self._lock.release()
|
||||
raise
|
||||
finally:
|
||||
self._refresh_on_disconnects = True
|
||||
|
||||
def api_version(self, operation, max_version=None):
|
||||
"""Find the latest version of the protocol operation supported by both
|
||||
this library and the broker.
|
||||
|
||||
This resolves to the lesser of either the latest api version this
|
||||
library supports, or the max version supported by the broker.
|
||||
|
||||
Arguments:
|
||||
operation: A list of protocol operation versions from kafka.protocol.
|
||||
|
||||
Keyword Arguments:
|
||||
max_version (int, optional): Provide an alternate maximum api version
|
||||
to reflect limitations in user code.
|
||||
|
||||
Returns:
|
||||
int: The highest api version number compatible between client and broker.
|
||||
|
||||
Raises: IncompatibleBrokerVersion if no matching version is found
|
||||
"""
|
||||
# Cap max_version at the largest available version in operation list
|
||||
max_version = min(len(operation) - 1, max_version if max_version is not None else float('inf'))
|
||||
broker_api_versions = self._api_versions
|
||||
api_key = operation[0].API_KEY
|
||||
if broker_api_versions is None or api_key not in broker_api_versions:
|
||||
raise Errors.IncompatibleBrokerVersion(
|
||||
"Kafka broker does not support the '{}' Kafka protocol."
|
||||
.format(operation[0].__name__))
|
||||
broker_min_version, broker_max_version = broker_api_versions[api_key]
|
||||
version = min(max_version, broker_max_version)
|
||||
if version < broker_min_version:
|
||||
# max library version is less than min broker version. Currently,
|
||||
# no Kafka versions specify a min msg version. Maybe in the future?
|
||||
raise Errors.IncompatibleBrokerVersion(
|
||||
"No version of the '{}' Kafka protocol is supported by both the client and broker."
|
||||
.format(operation[0].__name__))
|
||||
return version
|
||||
# Timeout
|
||||
else:
|
||||
self._lock.release()
|
||||
raise Errors.NoBrokersAvailable()
|
||||
|
||||
def wakeup(self):
|
||||
if self._closed or self._waking or self._wake_w is None:
|
||||
return
|
||||
with self._wake_lock:
|
||||
try:
|
||||
self._wake_w.sendall(b'x')
|
||||
self._waking = True
|
||||
except socket.timeout as e:
|
||||
except socket.timeout:
|
||||
log.warning('Timeout to send to wakeup socket!')
|
||||
raise Errors.KafkaTimeoutError(e)
|
||||
except socket.error as e:
|
||||
log.warning('Unable to send to wakeup socket! %s', e)
|
||||
raise e
|
||||
raise Errors.KafkaTimeoutError()
|
||||
except socket.error:
|
||||
log.warning('Unable to send to wakeup socket!')
|
||||
|
||||
def _clear_wake_fd(self):
|
||||
# reading from wake socket should only happen in a single thread
|
||||
with self._wake_lock:
|
||||
self._waking = False
|
||||
while True:
|
||||
try:
|
||||
if not self._wake_r.recv(1024):
|
||||
# Non-blocking socket returns empty on error
|
||||
log.warning("Error reading wakeup socket. Rebuilding socketpair.")
|
||||
self._close_wakeup_socketpair()
|
||||
self._init_wakeup_socketpair()
|
||||
break
|
||||
except socket.error:
|
||||
# Non-blocking socket raises when socket is ok but no data available to read
|
||||
break
|
||||
while True:
|
||||
try:
|
||||
self._wake_r.recv(1024)
|
||||
except socket.error:
|
||||
break
|
||||
|
||||
def _maybe_close_oldest_connection(self):
|
||||
expired_connection = self._idle_expiry_manager.poll_expired_connection()
|
||||
@@ -1158,39 +962,6 @@ class KafkaClient(object):
|
||||
else:
|
||||
return False
|
||||
|
||||
def await_ready(self, node_id, timeout_ms=30000):
|
||||
"""
|
||||
Invokes `poll` to discard pending disconnects, followed by `client.ready` and 0 or more `client.poll`
|
||||
invocations until the connection to `node` is ready, the timeoutMs expires or the connection fails.
|
||||
|
||||
It returns `true` if the call completes normally or `false` if the timeoutMs expires. If the connection fails,
|
||||
an `IOException` is thrown instead. Note that if the `NetworkClient` has been configured with a positive
|
||||
connection timeoutMs, it is possible for this method to raise an `IOException` for a previous connection which
|
||||
has recently disconnected.
|
||||
|
||||
This method is useful for implementing blocking behaviour on top of the non-blocking `NetworkClient`, use it with
|
||||
care.
|
||||
"""
|
||||
timer = Timer(timeout_ms)
|
||||
self.poll(timeout_ms=0)
|
||||
if self.is_ready(node_id):
|
||||
return True
|
||||
|
||||
while not self.is_ready(node_id) and not timer.expired:
|
||||
if self.connection_failed(node_id):
|
||||
raise Errors.KafkaConnectionError("Connection to %s failed." % (node_id,))
|
||||
self.maybe_connect(node_id)
|
||||
self.poll(timeout_ms=timer.timeout_ms)
|
||||
return self.is_ready(node_id)
|
||||
|
||||
def send_and_receive(self, node_id, request):
|
||||
future = self.send(node_id, request)
|
||||
self.poll(future=future)
|
||||
assert future.is_done
|
||||
if future.failed():
|
||||
raise future.exception
|
||||
return future.value
|
||||
|
||||
|
||||
# OrderedDict requires python2.7+
|
||||
try:
|
||||
@@ -1227,7 +998,7 @@ class IdleConnectionManager(object):
|
||||
|
||||
def next_check_ms(self):
|
||||
now = time.time()
|
||||
if not self.lru_connections or self.next_idle_close_check_time == float('inf'):
|
||||
if not self.lru_connections:
|
||||
return float('inf')
|
||||
elif self.next_idle_close_check_time <= now:
|
||||
return 0
|
||||
|
||||
@@ -3,15 +3,13 @@ from __future__ import absolute_import
|
||||
import collections
|
||||
import copy
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
from kafka import errors as Errors
|
||||
from kafka.conn import get_ip_port_afi
|
||||
from kafka.conn import collect_hosts
|
||||
from kafka.future import Future
|
||||
from kafka.structs import BrokerMetadata, PartitionMetadata, TopicPartition
|
||||
|
||||
@@ -23,7 +21,7 @@ class ClusterMetadata(object):
|
||||
A class to manage kafka cluster metadata.
|
||||
|
||||
This class does not perform any IO. It simply updates internal state
|
||||
given API responses (MetadataResponse, FindCoordinatorResponse).
|
||||
given API responses (MetadataResponse, GroupCoordinatorResponse).
|
||||
|
||||
Keyword Arguments:
|
||||
retry_backoff_ms (int): Milliseconds to backoff when retrying on
|
||||
@@ -49,7 +47,7 @@ class ClusterMetadata(object):
|
||||
self._brokers = {} # node_id -> BrokerMetadata
|
||||
self._partitions = {} # topic -> partition -> PartitionMetadata
|
||||
self._broker_partitions = collections.defaultdict(set) # node_id -> {TopicPartition...}
|
||||
self._coordinators = {} # (coord_type, coord_key) -> node_id
|
||||
self._groups = {} # group_name -> node_id
|
||||
self._last_refresh_ms = 0
|
||||
self._last_successful_refresh_ms = 0
|
||||
self._need_update = True
|
||||
@@ -60,7 +58,6 @@ class ClusterMetadata(object):
|
||||
self.unauthorized_topics = set()
|
||||
self.internal_topics = set()
|
||||
self.controller = None
|
||||
self.cluster_id = None
|
||||
|
||||
self.config = copy.copy(self.DEFAULT_CONFIG)
|
||||
for key in self.config:
|
||||
@@ -95,7 +92,7 @@ class ClusterMetadata(object):
|
||||
"""Get BrokerMetadata
|
||||
|
||||
Arguments:
|
||||
broker_id (int or str): node_id for a broker to check
|
||||
broker_id (int): node_id for a broker to check
|
||||
|
||||
Returns:
|
||||
BrokerMetadata or None if not found
|
||||
@@ -114,7 +111,6 @@ class ClusterMetadata(object):
|
||||
|
||||
Returns:
|
||||
set: {partition (int), ...}
|
||||
None if topic not found.
|
||||
"""
|
||||
if topic not in self._partitions:
|
||||
return None
|
||||
@@ -144,14 +140,11 @@ class ClusterMetadata(object):
|
||||
return None
|
||||
return self._partitions[partition.topic][partition.partition].leader
|
||||
|
||||
def leader_epoch_for_partition(self, partition):
|
||||
return self._partitions[partition.topic][partition.partition].leader_epoch
|
||||
|
||||
def partitions_for_broker(self, broker_id):
|
||||
"""Return TopicPartitions for which the broker is a leader.
|
||||
|
||||
Arguments:
|
||||
broker_id (int or str): node id for a broker
|
||||
broker_id (int): node id for a broker
|
||||
|
||||
Returns:
|
||||
set: {TopicPartition, ...}
|
||||
@@ -166,10 +159,10 @@ class ClusterMetadata(object):
|
||||
group (str): name of consumer group
|
||||
|
||||
Returns:
|
||||
node_id (int or str) for group coordinator, -1 if coordinator unknown
|
||||
int: node_id for group coordinator
|
||||
None if the group does not exist.
|
||||
"""
|
||||
return self._coordinators.get(('group', group))
|
||||
return self._groups.get(group)
|
||||
|
||||
def ttl(self):
|
||||
"""Milliseconds until metadata should be refreshed"""
|
||||
@@ -204,10 +197,6 @@ class ClusterMetadata(object):
|
||||
self._future = Future()
|
||||
return self._future
|
||||
|
||||
@property
|
||||
def need_update(self):
|
||||
return self._need_update
|
||||
|
||||
def topics(self, exclude_internal_topics=True):
|
||||
"""Get set of known topics.
|
||||
|
||||
@@ -245,6 +234,13 @@ class ClusterMetadata(object):
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
# In the common case where we ask for a single topic and get back an
|
||||
# error, we should fail the future
|
||||
if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
|
||||
error_code, topic = metadata.topics[0][:2]
|
||||
error = Errors.for_code(error_code)(topic)
|
||||
return self.failed_update(error)
|
||||
|
||||
if not metadata.brokers:
|
||||
log.warning("No broker metadata found in MetadataResponse -- ignoring.")
|
||||
return self.failed_update(Errors.MetadataEmptyBrokerList(metadata))
|
||||
@@ -265,11 +261,6 @@ class ClusterMetadata(object):
|
||||
else:
|
||||
_new_controller = _new_brokers.get(metadata.controller_id)
|
||||
|
||||
if metadata.API_VERSION < 2:
|
||||
_new_cluster_id = None
|
||||
else:
|
||||
_new_cluster_id = metadata.cluster_id
|
||||
|
||||
_new_partitions = {}
|
||||
_new_broker_partitions = collections.defaultdict(set)
|
||||
_new_unauthorized_topics = set()
|
||||
@@ -286,21 +277,10 @@ class ClusterMetadata(object):
|
||||
error_type = Errors.for_code(error_code)
|
||||
if error_type is Errors.NoError:
|
||||
_new_partitions[topic] = {}
|
||||
for partition_data in partitions:
|
||||
leader_epoch = -1
|
||||
offline_replicas = []
|
||||
if metadata.API_VERSION >= 7:
|
||||
p_error, partition, leader, leader_epoch, replicas, isr, offline_replicas = partition_data
|
||||
elif metadata.API_VERSION >= 5:
|
||||
p_error, partition, leader, replicas, isr, offline_replicas = partition_data
|
||||
else:
|
||||
p_error, partition, leader, replicas, isr = partition_data
|
||||
|
||||
for p_error, partition, leader, replicas, isr in partitions:
|
||||
_new_partitions[topic][partition] = PartitionMetadata(
|
||||
topic=topic, partition=partition,
|
||||
leader=leader, leader_epoch=leader_epoch,
|
||||
replicas=replicas, isr=isr, offline_replicas=offline_replicas,
|
||||
error=p_error)
|
||||
topic=topic, partition=partition, leader=leader,
|
||||
replicas=replicas, isr=isr, error=p_error)
|
||||
if leader != -1:
|
||||
_new_broker_partitions[leader].add(
|
||||
TopicPartition(topic, partition))
|
||||
@@ -326,7 +306,6 @@ class ClusterMetadata(object):
|
||||
with self._lock:
|
||||
self._brokers = _new_brokers
|
||||
self.controller = _new_controller
|
||||
self.cluster_id = _new_cluster_id
|
||||
self._partitions = _new_partitions
|
||||
self._broker_partitions = _new_broker_partitions
|
||||
self.unauthorized_topics = _new_unauthorized_topics
|
||||
@@ -342,15 +321,7 @@ class ClusterMetadata(object):
|
||||
self._last_successful_refresh_ms = now
|
||||
|
||||
if f:
|
||||
# In the common case where we ask for a single topic and get back an
|
||||
# error, we should fail the future
|
||||
if len(metadata.topics) == 1 and metadata.topics[0][0] != Errors.NoError.errno:
|
||||
error_code, topic = metadata.topics[0][:2]
|
||||
error = Errors.for_code(error_code)(topic)
|
||||
f.failure(error)
|
||||
else:
|
||||
f.success(self)
|
||||
|
||||
f.success(self)
|
||||
log.debug("Updated cluster metadata to %s", self)
|
||||
|
||||
for listener in self._listeners:
|
||||
@@ -371,25 +342,24 @@ class ClusterMetadata(object):
|
||||
"""Remove a previously added listener callback"""
|
||||
self._listeners.remove(listener)
|
||||
|
||||
def add_coordinator(self, response, coord_type, coord_key):
|
||||
"""Update with metadata for a group or txn coordinator
|
||||
def add_group_coordinator(self, group, response):
|
||||
"""Update with metadata for a group coordinator
|
||||
|
||||
Arguments:
|
||||
response (FindCoordinatorResponse): broker response
|
||||
coord_type (str): 'group' or 'transaction'
|
||||
coord_key (str): consumer_group or transactional_id
|
||||
group (str): name of group from GroupCoordinatorRequest
|
||||
response (GroupCoordinatorResponse): broker response
|
||||
|
||||
Returns:
|
||||
string: coordinator node_id if metadata is updated, None on error
|
||||
"""
|
||||
log.debug("Updating coordinator for %s/%s: %s", coord_type, coord_key, response)
|
||||
log.debug("Updating coordinator for %s: %s", group, response)
|
||||
error_type = Errors.for_code(response.error_code)
|
||||
if error_type is not Errors.NoError:
|
||||
log.error("FindCoordinatorResponse error: %s", error_type)
|
||||
self._coordinators[(coord_type, coord_key)] = -1
|
||||
log.error("GroupCoordinatorResponse error: %s", error_type)
|
||||
self._groups[group] = -1
|
||||
return
|
||||
|
||||
# Use a coordinator-specific node id so that requests
|
||||
# Use a coordinator-specific node id so that group requests
|
||||
# get a dedicated connection
|
||||
node_id = 'coordinator-{}'.format(response.coordinator_id)
|
||||
coordinator = BrokerMetadata(
|
||||
@@ -398,9 +368,9 @@ class ClusterMetadata(object):
|
||||
response.port,
|
||||
None)
|
||||
|
||||
log.info("Coordinator for %s/%s is %s", coord_type, coord_key, coordinator)
|
||||
log.info("Group coordinator for %s is %s", group, coordinator)
|
||||
self._coordinator_brokers[node_id] = coordinator
|
||||
self._coordinators[(coord_type, coord_key)] = node_id
|
||||
self._groups[group] = node_id
|
||||
return node_id
|
||||
|
||||
def with_partitions(self, partitions_to_add):
|
||||
@@ -409,7 +379,7 @@ class ClusterMetadata(object):
|
||||
new_metadata._brokers = copy.deepcopy(self._brokers)
|
||||
new_metadata._partitions = copy.deepcopy(self._partitions)
|
||||
new_metadata._broker_partitions = copy.deepcopy(self._broker_partitions)
|
||||
new_metadata._coordinators = copy.deepcopy(self._coordinators)
|
||||
new_metadata._groups = copy.deepcopy(self._groups)
|
||||
new_metadata.internal_topics = copy.deepcopy(self.internal_topics)
|
||||
new_metadata.unauthorized_topics = copy.deepcopy(self.unauthorized_topics)
|
||||
|
||||
@@ -423,26 +393,5 @@ class ClusterMetadata(object):
|
||||
return new_metadata
|
||||
|
||||
def __str__(self):
|
||||
return 'ClusterMetadata(brokers: %d, topics: %d, coordinators: %d)' % \
|
||||
(len(self._brokers), len(self._partitions), len(self._coordinators))
|
||||
|
||||
|
||||
def collect_hosts(hosts, randomize=True):
|
||||
"""
|
||||
Collects a comma-separated set of hosts (host:port) and optionally
|
||||
randomize the returned list.
|
||||
"""
|
||||
|
||||
if isinstance(hosts, six.string_types):
|
||||
hosts = hosts.strip().split(',')
|
||||
|
||||
result = []
|
||||
for host_port in hosts:
|
||||
# ignore leading SECURITY_PROTOCOL:// to mimic java client
|
||||
host_port = re.sub('^.*://', '', host_port)
|
||||
host, port, afi = get_ip_port_afi(host_port)
|
||||
result.append((host, port, afi))
|
||||
|
||||
if randomize:
|
||||
random.shuffle(result)
|
||||
return result
|
||||
return 'ClusterMetadata(brokers: %d, topics: %d, groups: %d)' % \
|
||||
(len(self._brokers), len(self._partitions), len(self._groups))
|
||||
|
||||
@@ -187,21 +187,14 @@ def _detect_xerial_stream(payload):
|
||||
The version is the version of this format as written by xerial,
|
||||
in the wild this is currently 1 as such we only support v1.
|
||||
|
||||
Compat is there to claim the minimum supported version that
|
||||
Compat is there to claim the miniumum supported version that
|
||||
can read a xerial block stream, presently in the wild this is
|
||||
1.
|
||||
"""
|
||||
|
||||
if len(payload) > 16:
|
||||
magic = struct.unpack('!' + _XERIAL_V1_FORMAT[:8], bytes(payload)[:8])
|
||||
version, compat = struct.unpack('!' + _XERIAL_V1_FORMAT[8:], bytes(payload)[8:16])
|
||||
# Until there is more than one way to do xerial blocking, the version + compat
|
||||
# fields can be ignored. Also some producers (i.e., redpanda) are known to
|
||||
# incorrectly encode these as little-endian, and that causes us to fail decoding
|
||||
# when we otherwise would have succeeded.
|
||||
# See https://github.com/dpkp/kafka-python/issues/2414
|
||||
if magic == _XERIAL_V1_HEADER[:8]:
|
||||
return True
|
||||
header = struct.unpack('!' + _XERIAL_V1_FORMAT, bytes(payload)[:16])
|
||||
return header == _XERIAL_V1_HEADER
|
||||
return False
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,7 @@ import logging
|
||||
import socket
|
||||
import time
|
||||
|
||||
from kafka.errors import KafkaConfigurationError, KafkaTimeoutError, UnsupportedVersionError
|
||||
from kafka.errors import KafkaConfigurationError, UnsupportedVersionError
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
@@ -16,9 +16,8 @@ from kafka.coordinator.consumer import ConsumerCoordinator
|
||||
from kafka.coordinator.assignors.range import RangePartitionAssignor
|
||||
from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
|
||||
from kafka.metrics import MetricConfig, Metrics
|
||||
from kafka.protocol.list_offsets import OffsetResetStrategy
|
||||
from kafka.structs import OffsetAndMetadata, TopicPartition
|
||||
from kafka.util import Timer
|
||||
from kafka.protocol.offset import OffsetResetStrategy
|
||||
from kafka.structs import TopicPartition
|
||||
from kafka.version import __version__
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -61,8 +60,6 @@ class KafkaConsumer(six.Iterator):
|
||||
raw message key and returns a deserialized key.
|
||||
value_deserializer (callable): Any callable that takes a
|
||||
raw message value and returns a deserialized value.
|
||||
enable_incremental_fetch_sessions: (bool): Use incremental fetch sessions
|
||||
when available / supported by kafka broker. See KIP-227. Default: True.
|
||||
fetch_min_bytes (int): Minimum amount of data the server should
|
||||
return for a fetch request, otherwise wait up to
|
||||
fetch_max_wait_ms for more data to accumulate. Default: 1.
|
||||
@@ -101,7 +98,7 @@ class KafkaConsumer(six.Iterator):
|
||||
reconnection attempts will continue periodically with this fixed
|
||||
rate. To avoid connection storms, a randomization factor of 0.2
|
||||
will be applied to the backoff resulting in a random range between
|
||||
20% below and 20% above the computed value. Default: 30000.
|
||||
20% below and 20% above the computed value. Default: 1000.
|
||||
max_in_flight_requests_per_connection (int): Requests are pipelined
|
||||
to kafka brokers up to this number of maximum requests per
|
||||
broker connection. Default: 5.
|
||||
@@ -121,12 +118,6 @@ class KafkaConsumer(six.Iterator):
|
||||
consumed. This ensures no on-the-wire or on-disk corruption to
|
||||
the messages occurred. This check adds some overhead, so it may
|
||||
be disabled in cases seeking extreme performance. Default: True
|
||||
isolation_level (str): Configure KIP-98 transactional consumer by
|
||||
setting to 'read_committed'. This will cause the consumer to
|
||||
skip records from aborted transactions. Default: 'read_uncommitted'
|
||||
allow_auto_create_topics (bool): Enable/disable auto topic creation
|
||||
on metadata request. Only available with api_version >= (0, 11).
|
||||
Default: True
|
||||
metadata_max_age_ms (int): The period of time in milliseconds after
|
||||
which we force a refresh of metadata, even if we haven't seen any
|
||||
partition leadership changes to proactively discover any new
|
||||
@@ -204,17 +195,10 @@ class KafkaConsumer(six.Iterator):
|
||||
or other configuration forbids use of all the specified ciphers),
|
||||
an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
|
||||
api_version (tuple): Specify which Kafka API version to use. If set to
|
||||
None, the client will attempt to determine the broker version via
|
||||
ApiVersionsRequest API or, for brokers earlier than 0.10, probing
|
||||
various known APIs. Dynamic version checking is performed eagerly
|
||||
during __init__ and can raise NoBrokersAvailableError if no connection
|
||||
was made before timeout (see api_version_auto_timeout_ms below).
|
||||
Different versions enable different functionality.
|
||||
None, the client will attempt to infer the broker version by probing
|
||||
various APIs. Different versions enable different functionality.
|
||||
|
||||
Examples:
|
||||
(3, 9) most recent broker release, enable all supported features
|
||||
(0, 11) enables message format v2 (internal)
|
||||
(0, 10, 0) enables sasl authentication and message format v1
|
||||
(0, 9) enables full group coordination features with automatic
|
||||
partition assignment and rebalancing,
|
||||
(0, 8, 2) enables kafka-storage offset commits with manual
|
||||
@@ -228,7 +212,6 @@ class KafkaConsumer(six.Iterator):
|
||||
api_version_auto_timeout_ms (int): number of milliseconds to throw a
|
||||
timeout exception from the constructor when checking the broker
|
||||
api version. Only applies if api_version set to None.
|
||||
Default: 2000
|
||||
connections_max_idle_ms: Close idle connections after the number of
|
||||
milliseconds specified by this config. The broker closes idle
|
||||
connections after connections.max.idle.ms, so this avoids hitting
|
||||
@@ -237,7 +220,6 @@ class KafkaConsumer(six.Iterator):
|
||||
metric_reporters (list): A list of classes to use as metrics reporters.
|
||||
Implementing the AbstractMetricsReporter interface allows plugging
|
||||
in classes that will be notified of new metric creation. Default: []
|
||||
metrics_enabled (bool): Whether to track metrics on this instance. Default True.
|
||||
metrics_num_samples (int): The number of samples maintained to compute
|
||||
metrics. Default: 2
|
||||
metrics_sample_window_ms (int): The maximum age in milliseconds of
|
||||
@@ -256,17 +238,12 @@ class KafkaConsumer(six.Iterator):
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
|
||||
sasl mechanism handshake. If provided, sasl_kerberos_service_name and
|
||||
sasl_kerberos_domain name are ignored. Default: None.
|
||||
sasl_kerberos_service_name (str): Service name to include in GSSAPI
|
||||
sasl mechanism handshake. Default: 'kafka'
|
||||
sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
|
||||
sasl mechanism handshake. Default: one of bootstrap servers
|
||||
sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
|
||||
token provider instance. Default: None
|
||||
socks5_proxy (str): Socks5 proxy URL. Default: None
|
||||
kafka_client (callable): Custom class / callable for creating KafkaClient instances
|
||||
sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
|
||||
instance. (See kafka.oauth.abstract). Default: None
|
||||
|
||||
Note:
|
||||
Configuration parameters are described in more detail at
|
||||
@@ -278,7 +255,6 @@ class KafkaConsumer(six.Iterator):
|
||||
'group_id': None,
|
||||
'key_deserializer': None,
|
||||
'value_deserializer': None,
|
||||
'enable_incremental_fetch_sessions': True,
|
||||
'fetch_max_wait_ms': 500,
|
||||
'fetch_min_bytes': 1,
|
||||
'fetch_max_bytes': 52428800,
|
||||
@@ -286,15 +262,13 @@ class KafkaConsumer(six.Iterator):
|
||||
'request_timeout_ms': 305000, # chosen to be higher than the default of max_poll_interval_ms
|
||||
'retry_backoff_ms': 100,
|
||||
'reconnect_backoff_ms': 50,
|
||||
'reconnect_backoff_max_ms': 30000,
|
||||
'reconnect_backoff_max_ms': 1000,
|
||||
'max_in_flight_requests_per_connection': 5,
|
||||
'auto_offset_reset': 'latest',
|
||||
'enable_auto_commit': True,
|
||||
'auto_commit_interval_ms': 5000,
|
||||
'default_offset_commit_callback': lambda offsets, response: True,
|
||||
'check_crcs': True,
|
||||
'isolation_level': 'read_uncommitted',
|
||||
'allow_auto_create_topics': True,
|
||||
'metadata_max_age_ms': 5 * 60 * 1000,
|
||||
'partition_assignment_strategy': (RangePartitionAssignor, RoundRobinPartitionAssignor),
|
||||
'max_poll_records': 500,
|
||||
@@ -320,7 +294,6 @@ class KafkaConsumer(six.Iterator):
|
||||
'api_version_auto_timeout_ms': 2000,
|
||||
'connections_max_idle_ms': 9 * 60 * 1000,
|
||||
'metric_reporters': [],
|
||||
'metrics_enabled': True,
|
||||
'metrics_num_samples': 2,
|
||||
'metrics_sample_window_ms': 30000,
|
||||
'metric_group_prefix': 'consumer',
|
||||
@@ -329,12 +302,10 @@ class KafkaConsumer(six.Iterator):
|
||||
'sasl_mechanism': None,
|
||||
'sasl_plain_username': None,
|
||||
'sasl_plain_password': None,
|
||||
'sasl_kerberos_name': None,
|
||||
'sasl_kerberos_service_name': 'kafka',
|
||||
'sasl_kerberos_domain_name': None,
|
||||
'sasl_oauth_token_provider': None,
|
||||
'socks5_proxy': None,
|
||||
'kafka_client': KafkaClient,
|
||||
'legacy_iterator': False, # enable to revert to < 1.4.7 iterator
|
||||
}
|
||||
DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000
|
||||
|
||||
@@ -364,15 +335,13 @@ class KafkaConsumer(six.Iterator):
|
||||
"fetch_max_wait_ms ({})."
|
||||
.format(connections_max_idle_ms, request_timeout_ms, fetch_max_wait_ms))
|
||||
|
||||
if self.config['metrics_enabled']:
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
else:
|
||||
self._metrics = None
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
# TODO _metrics likely needs to be passed to KafkaClient, etc.
|
||||
|
||||
# api_version was previously a str. Accept old format for now
|
||||
if isinstance(self.config['api_version'], str):
|
||||
@@ -384,10 +353,11 @@ class KafkaConsumer(six.Iterator):
|
||||
log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
|
||||
str(self.config['api_version']), str_version)
|
||||
|
||||
self._client = self.config['kafka_client'](metrics=self._metrics, **self.config)
|
||||
self._client = KafkaClient(metrics=self._metrics, **self.config)
|
||||
|
||||
# Get auto-discovered / normalized version from client
|
||||
self.config['api_version'] = self._client.config['api_version']
|
||||
# Get auto-discovered version from client if necessary
|
||||
if self.config['api_version'] is None:
|
||||
self.config['api_version'] = self._client.config['api_version']
|
||||
|
||||
# Coordinator configurations are different for older brokers
|
||||
# max_poll_interval_ms is not supported directly -- it must the be
|
||||
@@ -410,9 +380,9 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
self._subscription = SubscriptionState(self.config['auto_offset_reset'])
|
||||
self._fetcher = Fetcher(
|
||||
self._client, self._subscription, metrics=self._metrics, **self.config)
|
||||
self._client, self._subscription, self._metrics, **self.config)
|
||||
self._coordinator = ConsumerCoordinator(
|
||||
self._client, self._subscription, metrics=self._metrics,
|
||||
self._client, self._subscription, self._metrics,
|
||||
assignors=self.config['partition_assignment_strategy'],
|
||||
**self.config)
|
||||
self._closed = False
|
||||
@@ -452,15 +422,8 @@ class KafkaConsumer(six.Iterator):
|
||||
no rebalance operation triggered when group membership or cluster
|
||||
and topic metadata change.
|
||||
"""
|
||||
if not partitions:
|
||||
self.unsubscribe()
|
||||
else:
|
||||
# make sure the offsets of topic partitions the consumer is unsubscribing from
|
||||
# are committed since there will be no following rebalance
|
||||
self._coordinator.maybe_auto_commit_offsets_now()
|
||||
self._subscription.assign_from_user(partitions)
|
||||
self._client.set_topics([tp.topic for tp in partitions])
|
||||
log.debug("Subscribed to partition(s): %s", partitions)
|
||||
self._subscription.assign_from_user(partitions)
|
||||
self._client.set_topics([tp.topic for tp in partitions])
|
||||
|
||||
def assignment(self):
|
||||
"""Get the TopicPartitions currently assigned to this consumer.
|
||||
@@ -478,23 +441,20 @@ class KafkaConsumer(six.Iterator):
|
||||
"""
|
||||
return self._subscription.assigned_partitions()
|
||||
|
||||
def close(self, autocommit=True, timeout_ms=None):
|
||||
def close(self, autocommit=True):
|
||||
"""Close the consumer, waiting indefinitely for any needed cleanup.
|
||||
|
||||
Keyword Arguments:
|
||||
autocommit (bool): If auto-commit is configured for this consumer,
|
||||
this optional flag causes the consumer to attempt to commit any
|
||||
pending consumed offsets prior to close. Default: True
|
||||
timeout_ms (num, optional): Milliseconds to wait for auto-commit.
|
||||
Default: None
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
log.debug("Closing the KafkaConsumer.")
|
||||
self._closed = True
|
||||
self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms)
|
||||
if self._metrics:
|
||||
self._metrics.close()
|
||||
self._coordinator.close(autocommit=autocommit)
|
||||
self._metrics.close()
|
||||
self._client.close()
|
||||
try:
|
||||
self.config['key_deserializer'].close()
|
||||
@@ -540,7 +500,7 @@ class KafkaConsumer(six.Iterator):
|
||||
offsets, callback=callback)
|
||||
return future
|
||||
|
||||
def commit(self, offsets=None, timeout_ms=None):
|
||||
def commit(self, offsets=None):
|
||||
"""Commit offsets to kafka, blocking until success or error.
|
||||
|
||||
This commits offsets only to Kafka. The offsets committed using this API
|
||||
@@ -564,16 +524,17 @@ class KafkaConsumer(six.Iterator):
|
||||
assert self.config['group_id'] is not None, 'Requires group_id'
|
||||
if offsets is None:
|
||||
offsets = self._subscription.all_consumed_offsets()
|
||||
self._coordinator.commit_offsets_sync(offsets, timeout_ms=timeout_ms)
|
||||
self._coordinator.commit_offsets_sync(offsets)
|
||||
|
||||
def committed(self, partition, metadata=False, timeout_ms=None):
|
||||
def committed(self, partition, metadata=False):
|
||||
"""Get the last committed offset for the given partition.
|
||||
|
||||
This offset will be used as the position for the consumer
|
||||
in the event of a failure.
|
||||
|
||||
This call will block to do a remote call to get the latest committed
|
||||
offsets from the server.
|
||||
This call may block to do a remote call if the partition in question
|
||||
isn't assigned to this consumer or if the consumer hasn't yet
|
||||
initialized its cache of committed offsets.
|
||||
|
||||
Arguments:
|
||||
partition (TopicPartition): The partition to check.
|
||||
@@ -582,19 +543,28 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
Returns:
|
||||
The last committed offset (int or OffsetAndMetadata), or None if there was no prior commit.
|
||||
|
||||
Raises:
|
||||
KafkaTimeoutError if timeout_ms provided
|
||||
BrokerResponseErrors if OffsetFetchRequest raises an error.
|
||||
"""
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Requires >= Kafka 0.8.1'
|
||||
assert self.config['group_id'] is not None, 'Requires group_id'
|
||||
if not isinstance(partition, TopicPartition):
|
||||
raise TypeError('partition must be a TopicPartition namedtuple')
|
||||
committed = self._coordinator.fetch_committed_offsets([partition], timeout_ms=timeout_ms)
|
||||
if partition not in committed:
|
||||
return None
|
||||
return committed[partition] if metadata else committed[partition].offset
|
||||
if self._subscription.is_assigned(partition):
|
||||
committed = self._subscription.assignment[partition].committed
|
||||
if committed is None:
|
||||
self._coordinator.refresh_committed_offsets_if_needed()
|
||||
committed = self._subscription.assignment[partition].committed
|
||||
else:
|
||||
commit_map = self._coordinator.fetch_committed_offsets([partition])
|
||||
if partition in commit_map:
|
||||
committed = commit_map[partition]
|
||||
else:
|
||||
committed = None
|
||||
|
||||
if committed is not None:
|
||||
if metadata:
|
||||
return committed
|
||||
else:
|
||||
return committed.offset
|
||||
|
||||
def _fetch_all_topic_metadata(self):
|
||||
"""A blocking call that fetches topic metadata for all topics in the
|
||||
@@ -639,7 +609,7 @@ class KafkaConsumer(six.Iterator):
|
||||
if partitions is None:
|
||||
self._fetch_all_topic_metadata()
|
||||
partitions = cluster.partitions_for_topic(topic)
|
||||
return partitions or set()
|
||||
return partitions
|
||||
|
||||
def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
|
||||
"""Fetch data from assigned topics / partitions.
|
||||
@@ -679,88 +649,82 @@ class KafkaConsumer(six.Iterator):
|
||||
assert not self._closed, 'KafkaConsumer is closed'
|
||||
|
||||
# Poll for new data until the timeout expires
|
||||
timer = Timer(timeout_ms)
|
||||
while not self._closed:
|
||||
records = self._poll_once(timer, max_records, update_offsets=update_offsets)
|
||||
start = time.time()
|
||||
remaining = timeout_ms
|
||||
while True:
|
||||
records = self._poll_once(remaining, max_records, update_offsets=update_offsets)
|
||||
if records:
|
||||
return records
|
||||
elif timer.expired:
|
||||
break
|
||||
return {}
|
||||
|
||||
def _poll_once(self, timer, max_records, update_offsets=True):
|
||||
elapsed_ms = (time.time() - start) * 1000
|
||||
remaining = timeout_ms - elapsed_ms
|
||||
|
||||
if remaining <= 0:
|
||||
return {}
|
||||
|
||||
def _poll_once(self, timeout_ms, max_records, update_offsets=True):
|
||||
"""Do one round of polling. In addition to checking for new data, this does
|
||||
any needed heart-beating, auto-commits, and offset updates.
|
||||
|
||||
Arguments:
|
||||
timer (Timer): The maximum time in milliseconds to block.
|
||||
timeout_ms (int): The maximum time in milliseconds to block.
|
||||
|
||||
Returns:
|
||||
dict: Map of topic to list of records (may be empty).
|
||||
"""
|
||||
if not self._coordinator.poll(timeout_ms=timer.timeout_ms):
|
||||
log.debug('poll: timeout during coordinator.poll(); returning early')
|
||||
return {}
|
||||
self._coordinator.poll()
|
||||
|
||||
has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms)
|
||||
# Fetch positions if we have partitions we're subscribed to that we
|
||||
# don't know the offset for
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
self._update_fetch_positions(self._subscription.missing_fetch_positions())
|
||||
|
||||
# If data is available already, e.g. from a previous network client
|
||||
# poll() call to commit, then just return it immediately
|
||||
records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
|
||||
log.debug('poll: fetched records: %s, %s', records, partial)
|
||||
# Before returning the fetched records, we can send off the
|
||||
# next round of fetches and avoid block waiting for their
|
||||
# responses to enable pipelining while the user is handling the
|
||||
# fetched records.
|
||||
if not partial:
|
||||
log.debug("poll: Sending fetches")
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
|
||||
if records:
|
||||
# Before returning the fetched records, we can send off the
|
||||
# next round of fetches and avoid block waiting for their
|
||||
# responses to enable pipelining while the user is handling the
|
||||
# fetched records.
|
||||
if not partial:
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
return records
|
||||
|
||||
# We do not want to be stuck blocking in poll if we are missing some positions
|
||||
# since the offset lookup may be backing off after a failure
|
||||
poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000)
|
||||
if not has_all_fetch_positions:
|
||||
log.debug('poll: do not have all fetch positions...')
|
||||
poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms'])
|
||||
# Send any new fetches (won't resend pending fetches)
|
||||
futures = self._fetcher.send_fetches()
|
||||
if len(futures):
|
||||
self._client.poll(timeout_ms=0)
|
||||
|
||||
self._client.poll(timeout_ms=poll_timeout_ms)
|
||||
timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000)
|
||||
self._client.poll(timeout_ms=timeout_ms)
|
||||
# after the long poll, we should check whether the group needs to rebalance
|
||||
# prior to returning data so that the group can stabilize faster
|
||||
if self._coordinator.need_rejoin():
|
||||
log.debug('poll: coordinator needs rejoin; returning early')
|
||||
return {}
|
||||
|
||||
records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets)
|
||||
return records
|
||||
|
||||
def position(self, partition, timeout_ms=None):
|
||||
def position(self, partition):
|
||||
"""Get the offset of the next record that will be fetched
|
||||
|
||||
Arguments:
|
||||
partition (TopicPartition): Partition to check
|
||||
|
||||
Returns:
|
||||
int: Offset or None
|
||||
int: Offset
|
||||
"""
|
||||
if not isinstance(partition, TopicPartition):
|
||||
raise TypeError('partition must be a TopicPartition namedtuple')
|
||||
assert self._subscription.is_assigned(partition), 'Partition is not assigned'
|
||||
|
||||
timer = Timer(timeout_ms)
|
||||
position = self._subscription.assignment[partition].position
|
||||
while position is None:
|
||||
# batch update fetch positions for any partitions without a valid position
|
||||
if self._update_fetch_positions(timeout_ms=timer.timeout_ms):
|
||||
position = self._subscription.assignment[partition].position
|
||||
elif timer.expired:
|
||||
return None
|
||||
else:
|
||||
return position.offset
|
||||
offset = self._subscription.assignment[partition].position
|
||||
if offset is None:
|
||||
self._update_fetch_positions([partition])
|
||||
offset = self._subscription.assignment[partition].position
|
||||
return offset
|
||||
|
||||
def highwater(self, partition):
|
||||
"""Last known highwater offset for a partition.
|
||||
@@ -854,7 +818,8 @@ class KafkaConsumer(six.Iterator):
|
||||
assert partition in self._subscription.assigned_partitions(), 'Unassigned partition'
|
||||
log.debug("Seeking to offset %s for partition %s", offset, partition)
|
||||
self._subscription.assignment[partition].seek(offset)
|
||||
self._iterator = None
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
|
||||
def seek_to_beginning(self, *partitions):
|
||||
"""Seek to the oldest available offset for partitions.
|
||||
@@ -878,8 +843,9 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
for tp in partitions:
|
||||
log.debug("Seeking to beginning of partition %s", tp)
|
||||
self._subscription.request_offset_reset(tp, OffsetResetStrategy.EARLIEST)
|
||||
self._iterator = None
|
||||
self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST)
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
|
||||
def seek_to_end(self, *partitions):
|
||||
"""Seek to the most recent available offset for partitions.
|
||||
@@ -903,8 +869,9 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
for tp in partitions:
|
||||
log.debug("Seeking to end of partition %s", tp)
|
||||
self._subscription.request_offset_reset(tp, OffsetResetStrategy.LATEST)
|
||||
self._iterator = None
|
||||
self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST)
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
|
||||
def subscribe(self, topics=(), pattern=None, listener=None):
|
||||
"""Subscribe to a list of topics, or a topic regex pattern.
|
||||
@@ -975,16 +942,13 @@ class KafkaConsumer(six.Iterator):
|
||||
|
||||
def unsubscribe(self):
|
||||
"""Unsubscribe from all topics and clear all assigned partitions."""
|
||||
# make sure the offsets of topic partitions the consumer is unsubscribing from
|
||||
# are committed since there will be no following rebalance
|
||||
self._coordinator.maybe_auto_commit_offsets_now()
|
||||
self._subscription.unsubscribe()
|
||||
if self.config['api_version'] >= (0, 9):
|
||||
self._coordinator.maybe_leave_group()
|
||||
self._coordinator.close()
|
||||
self._client.cluster.need_all_topic_metadata = False
|
||||
self._client.set_topics([])
|
||||
log.debug("Unsubscribed all topics or patterns and assigned partitions")
|
||||
self._iterator = None
|
||||
if not self.config['legacy_iterator']:
|
||||
self._iterator = None
|
||||
|
||||
def metrics(self, raw=False):
|
||||
"""Get metrics on consumer performance.
|
||||
@@ -996,8 +960,6 @@ class KafkaConsumer(six.Iterator):
|
||||
This is an unstable interface. It may change in future
|
||||
releases without warning.
|
||||
"""
|
||||
if not self._metrics:
|
||||
return
|
||||
if raw:
|
||||
return self._metrics.metrics.copy()
|
||||
|
||||
@@ -1053,7 +1015,7 @@ class KafkaConsumer(six.Iterator):
|
||||
raise ValueError(
|
||||
"The target time for partition {} is {}. The target time "
|
||||
"cannot be negative.".format(tp, ts))
|
||||
return self._fetcher.offsets_by_times(
|
||||
return self._fetcher.get_offsets_by_times(
|
||||
timestamps, self.config['request_timeout_ms'])
|
||||
|
||||
def beginning_offsets(self, partitions):
|
||||
@@ -1119,7 +1081,7 @@ class KafkaConsumer(six.Iterator):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _update_fetch_positions(self, timeout_ms=None):
|
||||
def _update_fetch_positions(self, partitions):
|
||||
"""Set the fetch position to the committed position (if there is one)
|
||||
or reset it using the offset reset policy the user has configured.
|
||||
|
||||
@@ -1127,36 +1089,30 @@ class KafkaConsumer(six.Iterator):
|
||||
partitions (List[TopicPartition]): The partitions that need
|
||||
updating fetch positions.
|
||||
|
||||
Returns True if fetch positions updated, False if timeout or async reset is pending
|
||||
|
||||
Raises:
|
||||
NoOffsetForPartitionError: If no offset is stored for a given
|
||||
partition and no offset reset policy is defined.
|
||||
"""
|
||||
if self._subscription.has_all_fetch_positions():
|
||||
return True
|
||||
# Lookup any positions for partitions which are awaiting reset (which may be the
|
||||
# case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
|
||||
# this check first to avoid an unnecessary lookup of committed offsets (which
|
||||
# typically occurs when the user is manually assigning partitions and managing
|
||||
# their own offsets).
|
||||
self._fetcher.reset_offsets_if_needed(partitions)
|
||||
|
||||
if (self.config['api_version'] >= (0, 8, 1) and
|
||||
self.config['group_id'] is not None):
|
||||
# If there are any partitions which do not have a valid position and are not
|
||||
# awaiting reset, then we need to fetch committed offsets. We will only do a
|
||||
# coordinator lookup if there are partitions which have missing positions, so
|
||||
# a consumer with manually assigned partitions can avoid a coordinator dependence
|
||||
# by always ensuring that assigned partitions have an initial position.
|
||||
if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms):
|
||||
return False
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
# if we still don't have offsets for all partitions, then we should either seek
|
||||
# to the last committed position or reset using the auto reset policy
|
||||
if (self.config['api_version'] >= (0, 8, 1) and
|
||||
self.config['group_id'] is not None):
|
||||
# first refresh commits for all assigned partitions
|
||||
self._coordinator.refresh_committed_offsets_if_needed()
|
||||
|
||||
# If there are partitions still needing a position and a reset policy is defined,
|
||||
# request reset using the default policy. If no reset strategy is defined and there
|
||||
# are partitions with a missing position, then we will raise an exception.
|
||||
self._subscription.reset_missing_positions()
|
||||
|
||||
# Finally send an asynchronous request to lookup and update the positions of any
|
||||
# partitions which are awaiting reset.
|
||||
return not self._fetcher.reset_offsets_if_needed()
|
||||
# Then, do any offset lookups in case some positions are not known
|
||||
self._fetcher.update_fetch_positions(partitions)
|
||||
|
||||
def _message_generator_v2(self):
|
||||
timeout_ms = 1000 * max(0, self._consumer_timeout - time.time())
|
||||
timeout_ms = 1000 * (self._consumer_timeout - time.time())
|
||||
record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False)
|
||||
for tp, records in six.iteritems(record_map):
|
||||
# Generators are stateful, and it is possible that the tp / records
|
||||
@@ -1171,15 +1127,72 @@ class KafkaConsumer(six.Iterator):
|
||||
log.debug("Not returning fetched records for partition %s"
|
||||
" since it is no longer fetchable", tp)
|
||||
break
|
||||
self._subscription.assignment[tp].position = OffsetAndMetadata(record.offset + 1, '', -1)
|
||||
self._subscription.assignment[tp].position = record.offset + 1
|
||||
yield record
|
||||
|
||||
def _message_generator(self):
|
||||
assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment'
|
||||
while time.time() < self._consumer_timeout:
|
||||
|
||||
self._coordinator.poll()
|
||||
|
||||
# Fetch offsets for any subscribed partitions that we arent tracking yet
|
||||
if not self._subscription.has_all_fetch_positions():
|
||||
partitions = self._subscription.missing_fetch_positions()
|
||||
self._update_fetch_positions(partitions)
|
||||
|
||||
poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms'])
|
||||
self._client.poll(timeout_ms=poll_ms)
|
||||
|
||||
# after the long poll, we should check whether the group needs to rebalance
|
||||
# prior to returning data so that the group can stabilize faster
|
||||
if self._coordinator.need_rejoin():
|
||||
continue
|
||||
|
||||
# We need to make sure we at least keep up with scheduled tasks,
|
||||
# like heartbeats, auto-commits, and metadata refreshes
|
||||
timeout_at = self._next_timeout()
|
||||
|
||||
# Short-circuit the fetch iterator if we are already timed out
|
||||
# to avoid any unintentional interaction with fetcher setup
|
||||
if time.time() > timeout_at:
|
||||
continue
|
||||
|
||||
for msg in self._fetcher:
|
||||
yield msg
|
||||
if time.time() > timeout_at:
|
||||
log.debug("internal iterator timeout - breaking for poll")
|
||||
break
|
||||
self._client.poll(timeout_ms=0)
|
||||
|
||||
# An else block on a for loop only executes if there was no break
|
||||
# so this should only be called on a StopIteration from the fetcher
|
||||
# We assume that it is safe to init_fetches when fetcher is done
|
||||
# i.e., there are no more records stored internally
|
||||
else:
|
||||
self._fetcher.send_fetches()
|
||||
|
||||
def _next_timeout(self):
|
||||
timeout = min(self._consumer_timeout,
|
||||
self._client.cluster.ttl() / 1000.0 + time.time(),
|
||||
self._coordinator.time_to_next_poll() + time.time())
|
||||
return timeout
|
||||
|
||||
def __iter__(self): # pylint: disable=non-iterator-returned
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._closed:
|
||||
raise StopIteration('KafkaConsumer closed')
|
||||
# Now that the heartbeat thread runs in the background
|
||||
# there should be no reason to maintain a separate iterator
|
||||
# but we'll keep it available for a few releases just in case
|
||||
if self.config['legacy_iterator']:
|
||||
return self.next_v1()
|
||||
else:
|
||||
return self.next_v2()
|
||||
|
||||
def next_v2(self):
|
||||
self._set_consumer_timeout()
|
||||
while time.time() < self._consumer_timeout:
|
||||
if not self._iterator:
|
||||
@@ -1190,6 +1203,17 @@ class KafkaConsumer(six.Iterator):
|
||||
self._iterator = None
|
||||
raise StopIteration()
|
||||
|
||||
def next_v1(self):
|
||||
if not self._iterator:
|
||||
self._iterator = self._message_generator()
|
||||
|
||||
self._set_consumer_timeout()
|
||||
try:
|
||||
return next(self._iterator)
|
||||
except StopIteration:
|
||||
self._iterator = None
|
||||
raise
|
||||
|
||||
def _set_consumer_timeout(self):
|
||||
# consumer_timeout_ms can be used to stop iteration early
|
||||
if self.config['consumer_timeout_ms'] >= 0:
|
||||
|
||||
@@ -1,40 +1,18 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
from collections import OrderedDict
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except ImportError:
|
||||
from collections import Sequence
|
||||
try:
|
||||
# enum in stdlib as of py3.4
|
||||
from enum import IntEnum # pylint: disable=import-error
|
||||
except ImportError:
|
||||
# vendored backport module
|
||||
from kafka.vendor.enum34 import IntEnum
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
import kafka.errors as Errors
|
||||
from kafka.protocol.list_offsets import OffsetResetStrategy
|
||||
from kafka.errors import IllegalStateError
|
||||
from kafka.protocol.offset import OffsetResetStrategy
|
||||
from kafka.structs import OffsetAndMetadata
|
||||
from kafka.util import ensure_valid_topic_name, synchronized
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubscriptionType(IntEnum):
|
||||
NONE = 0
|
||||
AUTO_TOPICS = 1
|
||||
AUTO_PATTERN = 2
|
||||
USER_ASSIGNED = 3
|
||||
|
||||
|
||||
class SubscriptionState(object):
|
||||
"""
|
||||
A class for tracking the topics, partitions, and offsets for the consumer.
|
||||
@@ -54,6 +32,10 @@ class SubscriptionState(object):
|
||||
Note that pause state as well as fetch/consumed positions are not preserved
|
||||
when partition assignment is changed whether directly by the user or
|
||||
through a group rebalance.
|
||||
|
||||
This class also maintains a cache of the latest commit position for each of
|
||||
the assigned partitions. This is updated through committed() and can be used
|
||||
to set the initial fetch position (e.g. Fetcher._reset_offset() ).
|
||||
"""
|
||||
_SUBSCRIPTION_EXCEPTION_MESSAGE = (
|
||||
"You must choose only one way to configure your consumer:"
|
||||
@@ -61,6 +43,10 @@ class SubscriptionState(object):
|
||||
" (2) subscribe to topics matching a regex pattern,"
|
||||
" (3) assign itself specific topic-partitions.")
|
||||
|
||||
# Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29
|
||||
_MAX_NAME_LENGTH = 249
|
||||
_TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$')
|
||||
|
||||
def __init__(self, offset_reset_strategy='earliest'):
|
||||
"""Initialize a SubscriptionState instance
|
||||
|
||||
@@ -78,24 +64,15 @@ class SubscriptionState(object):
|
||||
self._default_offset_reset_strategy = offset_reset_strategy
|
||||
|
||||
self.subscription = None # set() or None
|
||||
self.subscription_type = SubscriptionType.NONE
|
||||
self.subscribed_pattern = None # regex str or None
|
||||
self._group_subscription = set()
|
||||
self._user_assignment = set()
|
||||
self.assignment = OrderedDict()
|
||||
self.rebalance_listener = None
|
||||
self.listeners = []
|
||||
self._lock = threading.RLock()
|
||||
self.assignment = dict()
|
||||
self.listener = None
|
||||
|
||||
def _set_subscription_type(self, subscription_type):
|
||||
if not isinstance(subscription_type, SubscriptionType):
|
||||
raise ValueError('SubscriptionType enum required')
|
||||
if self.subscription_type == SubscriptionType.NONE:
|
||||
self.subscription_type = subscription_type
|
||||
elif self.subscription_type != subscription_type:
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
# initialize to true for the consumers to fetch offset upon starting up
|
||||
self.needs_fetch_committed_offsets = True
|
||||
|
||||
@synchronized
|
||||
def subscribe(self, topics=(), pattern=None, listener=None):
|
||||
"""Subscribe to a list of topics, or a topic regex pattern.
|
||||
|
||||
@@ -131,26 +108,39 @@ class SubscriptionState(object):
|
||||
guaranteed, however, that the partitions revoked/assigned
|
||||
through this interface are from topics subscribed in this call.
|
||||
"""
|
||||
if self._user_assignment or (topics and pattern):
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
assert topics or pattern, 'Must provide topics or pattern'
|
||||
if (topics and pattern):
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
|
||||
elif pattern:
|
||||
self._set_subscription_type(SubscriptionType.AUTO_PATTERN)
|
||||
if pattern:
|
||||
log.info('Subscribing to pattern: /%s/', pattern)
|
||||
self.subscription = set()
|
||||
self.subscribed_pattern = re.compile(pattern)
|
||||
else:
|
||||
if isinstance(topics, str) or not isinstance(topics, Sequence):
|
||||
raise TypeError('Topics must be a list (or non-str sequence)')
|
||||
self._set_subscription_type(SubscriptionType.AUTO_TOPICS)
|
||||
self.change_subscription(topics)
|
||||
|
||||
if listener and not isinstance(listener, ConsumerRebalanceListener):
|
||||
raise TypeError('listener must be a ConsumerRebalanceListener')
|
||||
self.rebalance_listener = listener
|
||||
self.listener = listener
|
||||
|
||||
def _ensure_valid_topic_name(self, topic):
|
||||
""" Ensures that the topic name is valid according to the kafka source. """
|
||||
|
||||
# See Kafka Source:
|
||||
# https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
|
||||
if topic is None:
|
||||
raise TypeError('All topics must not be None')
|
||||
if not isinstance(topic, six.string_types):
|
||||
raise TypeError('All topics must be strings')
|
||||
if len(topic) == 0:
|
||||
raise ValueError('All topics must be non-empty strings')
|
||||
if topic == '.' or topic == '..':
|
||||
raise ValueError('Topic name cannot be "." or ".."')
|
||||
if len(topic) > self._MAX_NAME_LENGTH:
|
||||
raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(self._MAX_NAME_LENGTH, topic))
|
||||
if not self._TOPIC_LEGAL_CHARS.match(topic):
|
||||
raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic))
|
||||
|
||||
@synchronized
|
||||
def change_subscription(self, topics):
|
||||
"""Change the topic subscription.
|
||||
|
||||
@@ -164,8 +154,8 @@ class SubscriptionState(object):
|
||||
- a topic name is '.' or '..' or
|
||||
- a topic name does not consist of ASCII-characters/'-'/'_'/'.'
|
||||
"""
|
||||
if not self.partitions_auto_assigned():
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
if self._user_assignment:
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
|
||||
if isinstance(topics, six.string_types):
|
||||
topics = [topics]
|
||||
@@ -176,13 +166,17 @@ class SubscriptionState(object):
|
||||
return
|
||||
|
||||
for t in topics:
|
||||
ensure_valid_topic_name(t)
|
||||
self._ensure_valid_topic_name(t)
|
||||
|
||||
log.info('Updating subscribed topics to: %s', topics)
|
||||
self.subscription = set(topics)
|
||||
self._group_subscription.update(topics)
|
||||
|
||||
@synchronized
|
||||
# Remove any assigned partitions which are no longer subscribed to
|
||||
for tp in set(self.assignment.keys()):
|
||||
if tp.topic not in self.subscription:
|
||||
del self.assignment[tp]
|
||||
|
||||
def group_subscribe(self, topics):
|
||||
"""Add topics to the current group subscription.
|
||||
|
||||
@@ -192,19 +186,17 @@ class SubscriptionState(object):
|
||||
Arguments:
|
||||
topics (list of str): topics to add to the group subscription
|
||||
"""
|
||||
if not self.partitions_auto_assigned():
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
if self._user_assignment:
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
self._group_subscription.update(topics)
|
||||
|
||||
@synchronized
|
||||
def reset_group_subscription(self):
|
||||
"""Reset the group's subscription to only contain topics subscribed by this consumer."""
|
||||
if not self.partitions_auto_assigned():
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
if self._user_assignment:
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
assert self.subscription is not None, 'Subscription required'
|
||||
self._group_subscription.intersection_update(self.subscription)
|
||||
|
||||
@synchronized
|
||||
def assign_from_user(self, partitions):
|
||||
"""Manually assign a list of TopicPartitions to this consumer.
|
||||
|
||||
@@ -223,13 +215,21 @@ class SubscriptionState(object):
|
||||
Raises:
|
||||
IllegalStateError: if consumer has already called subscribe()
|
||||
"""
|
||||
self._set_subscription_type(SubscriptionType.USER_ASSIGNED)
|
||||
if self.subscription is not None:
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
|
||||
if self._user_assignment != set(partitions):
|
||||
self._user_assignment = set(partitions)
|
||||
self._set_assignment({partition: self.assignment.get(partition, TopicPartitionState())
|
||||
for partition in partitions})
|
||||
|
||||
@synchronized
|
||||
for partition in partitions:
|
||||
if partition not in self.assignment:
|
||||
self._add_assigned_partition(partition)
|
||||
|
||||
for tp in set(self.assignment.keys()) - self._user_assignment:
|
||||
del self.assignment[tp]
|
||||
|
||||
self.needs_fetch_committed_offsets = True
|
||||
|
||||
def assign_from_subscribed(self, assignments):
|
||||
"""Update the assignment to the specified partitions
|
||||
|
||||
@@ -243,39 +243,26 @@ class SubscriptionState(object):
|
||||
consumer instance.
|
||||
"""
|
||||
if not self.partitions_auto_assigned():
|
||||
raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE)
|
||||
|
||||
for tp in assignments:
|
||||
if tp.topic not in self.subscription:
|
||||
raise ValueError("Assigned partition %s for non-subscribed topic." % (tp,))
|
||||
|
||||
# randomized ordering should improve balance for short-lived consumers
|
||||
self._set_assignment({partition: TopicPartitionState() for partition in assignments}, randomize=True)
|
||||
# after rebalancing, we always reinitialize the assignment state
|
||||
self.assignment.clear()
|
||||
for tp in assignments:
|
||||
self._add_assigned_partition(tp)
|
||||
self.needs_fetch_committed_offsets = True
|
||||
log.info("Updated partition assignment: %s", assignments)
|
||||
|
||||
def _set_assignment(self, partition_states, randomize=False):
|
||||
"""Batch partition assignment by topic (self.assignment is OrderedDict)"""
|
||||
self.assignment.clear()
|
||||
topics = [tp.topic for tp in six.iterkeys(partition_states)]
|
||||
if randomize:
|
||||
random.shuffle(topics)
|
||||
topic_partitions = OrderedDict({topic: [] for topic in topics})
|
||||
for tp in six.iterkeys(partition_states):
|
||||
topic_partitions[tp.topic].append(tp)
|
||||
for topic in six.iterkeys(topic_partitions):
|
||||
for tp in topic_partitions[topic]:
|
||||
self.assignment[tp] = partition_states[tp]
|
||||
|
||||
@synchronized
|
||||
def unsubscribe(self):
|
||||
"""Clear all topic subscriptions and partition assignments"""
|
||||
self.subscription = None
|
||||
self._user_assignment.clear()
|
||||
self.assignment.clear()
|
||||
self.subscribed_pattern = None
|
||||
self.subscription_type = SubscriptionType.NONE
|
||||
|
||||
@synchronized
|
||||
def group_subscription(self):
|
||||
"""Get the topic subscription for the group.
|
||||
|
||||
@@ -291,7 +278,6 @@ class SubscriptionState(object):
|
||||
"""
|
||||
return self._group_subscription
|
||||
|
||||
@synchronized
|
||||
def seek(self, partition, offset):
|
||||
"""Manually specify the fetch offset for a TopicPartition.
|
||||
|
||||
@@ -303,48 +289,40 @@ class SubscriptionState(object):
|
||||
|
||||
Arguments:
|
||||
partition (TopicPartition): partition for seek operation
|
||||
offset (int or OffsetAndMetadata): message offset in partition
|
||||
offset (int): message offset in partition
|
||||
"""
|
||||
if not isinstance(offset, (int, OffsetAndMetadata)):
|
||||
raise TypeError("offset must be type in or OffsetAndMetadata")
|
||||
self.assignment[partition].seek(offset)
|
||||
|
||||
@synchronized
|
||||
def assigned_partitions(self):
|
||||
"""Return set of TopicPartitions in current assignment."""
|
||||
return set(self.assignment.keys())
|
||||
|
||||
@synchronized
|
||||
def paused_partitions(self):
|
||||
"""Return current set of paused TopicPartitions."""
|
||||
return set(partition for partition in self.assignment
|
||||
if self.is_paused(partition))
|
||||
|
||||
@synchronized
|
||||
def fetchable_partitions(self):
|
||||
"""Return ordered list of TopicPartitions that should be Fetched."""
|
||||
fetchable = list()
|
||||
"""Return set of TopicPartitions that should be Fetched."""
|
||||
fetchable = set()
|
||||
for partition, state in six.iteritems(self.assignment):
|
||||
if state.is_fetchable():
|
||||
fetchable.append(partition)
|
||||
fetchable.add(partition)
|
||||
return fetchable
|
||||
|
||||
@synchronized
|
||||
def partitions_auto_assigned(self):
|
||||
"""Return True unless user supplied partitions manually."""
|
||||
return self.subscription_type in (SubscriptionType.AUTO_TOPICS, SubscriptionType.AUTO_PATTERN)
|
||||
return self.subscription is not None
|
||||
|
||||
@synchronized
|
||||
def all_consumed_offsets(self):
|
||||
"""Returns consumed offsets as {TopicPartition: OffsetAndMetadata}"""
|
||||
all_consumed = {}
|
||||
for partition, state in six.iteritems(self.assignment):
|
||||
if state.has_valid_position:
|
||||
all_consumed[partition] = state.position
|
||||
all_consumed[partition] = OffsetAndMetadata(state.position, '')
|
||||
return all_consumed
|
||||
|
||||
@synchronized
|
||||
def request_offset_reset(self, partition, offset_reset_strategy=None):
|
||||
def need_offset_reset(self, partition, offset_reset_strategy=None):
|
||||
"""Mark partition for offset reset using specified or default strategy.
|
||||
|
||||
Arguments:
|
||||
@@ -353,113 +331,63 @@ class SubscriptionState(object):
|
||||
"""
|
||||
if offset_reset_strategy is None:
|
||||
offset_reset_strategy = self._default_offset_reset_strategy
|
||||
self.assignment[partition].reset(offset_reset_strategy)
|
||||
self.assignment[partition].await_reset(offset_reset_strategy)
|
||||
|
||||
@synchronized
|
||||
def set_reset_pending(self, partitions, next_allowed_reset_time):
|
||||
for partition in partitions:
|
||||
self.assignment[partition].set_reset_pending(next_allowed_reset_time)
|
||||
|
||||
@synchronized
|
||||
def has_default_offset_reset_policy(self):
|
||||
"""Return True if default offset reset policy is Earliest or Latest"""
|
||||
return self._default_offset_reset_strategy != OffsetResetStrategy.NONE
|
||||
|
||||
@synchronized
|
||||
def is_offset_reset_needed(self, partition):
|
||||
return self.assignment[partition].awaiting_reset
|
||||
|
||||
@synchronized
|
||||
def has_all_fetch_positions(self):
|
||||
for state in six.itervalues(self.assignment):
|
||||
for state in self.assignment.values():
|
||||
if not state.has_valid_position:
|
||||
return False
|
||||
return True
|
||||
|
||||
@synchronized
|
||||
def missing_fetch_positions(self):
|
||||
missing = set()
|
||||
for partition, state in six.iteritems(self.assignment):
|
||||
if state.is_missing_position():
|
||||
if not state.has_valid_position:
|
||||
missing.add(partition)
|
||||
return missing
|
||||
|
||||
@synchronized
|
||||
def has_valid_position(self, partition):
|
||||
return partition in self.assignment and self.assignment[partition].has_valid_position
|
||||
|
||||
@synchronized
|
||||
def reset_missing_positions(self):
|
||||
partitions_with_no_offsets = set()
|
||||
for tp, state in six.iteritems(self.assignment):
|
||||
if state.is_missing_position():
|
||||
if self._default_offset_reset_strategy == OffsetResetStrategy.NONE:
|
||||
partitions_with_no_offsets.add(tp)
|
||||
else:
|
||||
state.reset(self._default_offset_reset_strategy)
|
||||
|
||||
if partitions_with_no_offsets:
|
||||
raise Errors.NoOffsetForPartitionError(partitions_with_no_offsets)
|
||||
|
||||
@synchronized
|
||||
def partitions_needing_reset(self):
|
||||
partitions = set()
|
||||
for tp, state in six.iteritems(self.assignment):
|
||||
if state.awaiting_reset and state.is_reset_allowed():
|
||||
partitions.add(tp)
|
||||
return partitions
|
||||
|
||||
@synchronized
|
||||
def is_assigned(self, partition):
|
||||
return partition in self.assignment
|
||||
|
||||
@synchronized
|
||||
def is_paused(self, partition):
|
||||
return partition in self.assignment and self.assignment[partition].paused
|
||||
|
||||
@synchronized
|
||||
def is_fetchable(self, partition):
|
||||
return partition in self.assignment and self.assignment[partition].is_fetchable()
|
||||
|
||||
@synchronized
|
||||
def pause(self, partition):
|
||||
self.assignment[partition].pause()
|
||||
|
||||
@synchronized
|
||||
def resume(self, partition):
|
||||
self.assignment[partition].resume()
|
||||
|
||||
@synchronized
|
||||
def reset_failed(self, partitions, next_retry_time):
|
||||
for partition in partitions:
|
||||
self.assignment[partition].reset_failed(next_retry_time)
|
||||
|
||||
@synchronized
|
||||
def move_partition_to_end(self, partition):
|
||||
if partition in self.assignment:
|
||||
try:
|
||||
self.assignment.move_to_end(partition)
|
||||
except AttributeError:
|
||||
state = self.assignment.pop(partition)
|
||||
self.assignment[partition] = state
|
||||
|
||||
@synchronized
|
||||
def position(self, partition):
|
||||
return self.assignment[partition].position
|
||||
def _add_assigned_partition(self, partition):
|
||||
self.assignment[partition] = TopicPartitionState()
|
||||
|
||||
|
||||
class TopicPartitionState(object):
|
||||
def __init__(self):
|
||||
self.committed = None # last committed OffsetAndMetadata
|
||||
self.has_valid_position = False # whether we have valid position
|
||||
self.paused = False # whether this partition has been paused by the user
|
||||
self.reset_strategy = None # the reset strategy if awaiting_reset is set
|
||||
self._position = None # OffsetAndMetadata exposed to the user
|
||||
self.awaiting_reset = False # whether we are awaiting reset
|
||||
self.reset_strategy = None # the reset strategy if awaitingReset is set
|
||||
self._position = None # offset exposed to the user
|
||||
self.highwater = None
|
||||
self.drop_pending_record_batch = False
|
||||
self.next_allowed_retry_time = None
|
||||
self.drop_pending_message_set = False
|
||||
# The last message offset hint available from a message batch with
|
||||
# magic=2 which includes deleted compacted messages
|
||||
self.last_offset_from_message_batch = None
|
||||
|
||||
def _set_position(self, offset):
|
||||
assert self.has_valid_position, 'Valid position required'
|
||||
assert isinstance(offset, OffsetAndMetadata)
|
||||
self._position = offset
|
||||
|
||||
def _get_position(self):
|
||||
@@ -467,37 +395,20 @@ class TopicPartitionState(object):
|
||||
|
||||
position = property(_get_position, _set_position, None, "last position")
|
||||
|
||||
def reset(self, strategy):
|
||||
assert strategy is not None
|
||||
def await_reset(self, strategy):
|
||||
self.awaiting_reset = True
|
||||
self.reset_strategy = strategy
|
||||
self._position = None
|
||||
self.next_allowed_retry_time = None
|
||||
|
||||
def is_reset_allowed(self):
|
||||
return self.next_allowed_retry_time is None or self.next_allowed_retry_time < time.time()
|
||||
|
||||
@property
|
||||
def awaiting_reset(self):
|
||||
return self.reset_strategy is not None
|
||||
|
||||
def set_reset_pending(self, next_allowed_retry_time):
|
||||
self.next_allowed_retry_time = next_allowed_retry_time
|
||||
|
||||
def reset_failed(self, next_allowed_retry_time):
|
||||
self.next_allowed_retry_time = next_allowed_retry_time
|
||||
|
||||
@property
|
||||
def has_valid_position(self):
|
||||
return self._position is not None
|
||||
|
||||
def is_missing_position(self):
|
||||
return not self.has_valid_position and not self.awaiting_reset
|
||||
self.last_offset_from_message_batch = None
|
||||
self.has_valid_position = False
|
||||
|
||||
def seek(self, offset):
|
||||
self._position = offset if isinstance(offset, OffsetAndMetadata) else OffsetAndMetadata(offset, '', -1)
|
||||
self._position = offset
|
||||
self.awaiting_reset = False
|
||||
self.reset_strategy = None
|
||||
self.drop_pending_record_batch = True
|
||||
self.next_allowed_retry_time = None
|
||||
self.has_valid_position = True
|
||||
self.drop_pending_message_set = True
|
||||
self.last_offset_from_message_batch = None
|
||||
|
||||
def pause(self):
|
||||
self.paused = True
|
||||
@@ -509,7 +420,6 @@ class TopicPartitionState(object):
|
||||
return not self.paused and self.has_valid_position
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class ConsumerRebalanceListener(object):
|
||||
"""
|
||||
A callback interface that the user can implement to trigger custom actions
|
||||
@@ -551,6 +461,8 @@ class ConsumerRebalanceListener(object):
|
||||
taking over that partition has their on_partitions_assigned() callback
|
||||
called to load the state.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def on_partitions_revoked(self, revoked):
|
||||
"""
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
from collections import defaultdict, namedtuple
|
||||
from copy import deepcopy
|
||||
|
||||
from kafka.cluster import ClusterMetadata
|
||||
from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor
|
||||
from kafka.coordinator.assignors.sticky.partition_movements import PartitionMovements
|
||||
from kafka.coordinator.assignors.sticky.sorted_set import SortedSet
|
||||
@@ -647,19 +648,15 @@ class StickyPartitionAssignor(AbstractPartitionAssignor):
|
||||
|
||||
@classmethod
|
||||
def metadata(cls, topics):
|
||||
return cls._metadata(topics, cls.member_assignment, cls.generation)
|
||||
|
||||
@classmethod
|
||||
def _metadata(cls, topics, member_assignment_partitions, generation=-1):
|
||||
if member_assignment_partitions is None:
|
||||
if cls.member_assignment is None:
|
||||
log.debug("No member assignment available")
|
||||
user_data = b''
|
||||
else:
|
||||
log.debug("Member assignment is available, generating the metadata: generation {}".format(cls.generation))
|
||||
partitions_by_topic = defaultdict(list)
|
||||
for topic_partition in member_assignment_partitions:
|
||||
for topic_partition in cls.member_assignment: # pylint: disable=not-an-iterable
|
||||
partitions_by_topic[topic_partition.topic].append(topic_partition.partition)
|
||||
data = StickyAssignorUserDataV1(list(partitions_by_topic.items()), generation)
|
||||
data = StickyAssignorUserDataV1(six.iteritems(partitions_by_topic), cls.generation)
|
||||
user_data = data.encode()
|
||||
return ConsumerProtocolMemberMetadata(cls.version, list(topics), user_data)
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,7 +19,7 @@ from kafka.metrics import AnonMeasurable
|
||||
from kafka.metrics.stats import Avg, Count, Max, Rate
|
||||
from kafka.protocol.commit import OffsetCommitRequest, OffsetFetchRequest
|
||||
from kafka.structs import OffsetAndMetadata, TopicPartition
|
||||
from kafka.util import Timer, WeakMethod
|
||||
from kafka.util import WeakMethod
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -39,11 +39,10 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
'retry_backoff_ms': 100,
|
||||
'api_version': (0, 10, 1),
|
||||
'exclude_internal_topics': True,
|
||||
'metrics': None,
|
||||
'metric_group_prefix': 'consumer'
|
||||
}
|
||||
|
||||
def __init__(self, client, subscription, **configs):
|
||||
def __init__(self, client, subscription, metrics, **configs):
|
||||
"""Initialize the coordination manager.
|
||||
|
||||
Keyword Arguments:
|
||||
@@ -55,7 +54,7 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
auto_commit_interval_ms (int): milliseconds between automatic
|
||||
offset commits, if enable_auto_commit is True. Default: 5000.
|
||||
default_offset_commit_callback (callable): called as
|
||||
callback(offsets, response) response will be either an Exception
|
||||
callback(offsets, exception) response will be either an Exception
|
||||
or None. This callback can be used to trigger custom actions when
|
||||
a commit request completes.
|
||||
assignors (list): List of objects to use to distribute partition
|
||||
@@ -79,7 +78,7 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
True the only way to receive records from an internal topic is
|
||||
subscribing to it. Requires 0.10+. Default: True
|
||||
"""
|
||||
super(ConsumerCoordinator, self).__init__(client, **configs)
|
||||
super(ConsumerCoordinator, self).__init__(client, metrics, **configs)
|
||||
|
||||
self.config = copy.copy(self.DEFAULT_CONFIG)
|
||||
for key in self.config:
|
||||
@@ -95,7 +94,6 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
self.auto_commit_interval = self.config['auto_commit_interval_ms'] / 1000
|
||||
self.next_auto_commit_deadline = None
|
||||
self.completed_offset_commits = collections.deque()
|
||||
self._offset_fetch_futures = dict()
|
||||
|
||||
if self.config['default_offset_commit_callback'] is None:
|
||||
self.config['default_offset_commit_callback'] = self._default_offset_commit_callback
|
||||
@@ -122,21 +120,15 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
else:
|
||||
self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
|
||||
|
||||
if self.config['metrics']:
|
||||
self._consumer_sensors = ConsumerCoordinatorMetrics(
|
||||
self.config['metrics'], self.config['metric_group_prefix'], self._subscription)
|
||||
else:
|
||||
self._consumer_sensors = None
|
||||
self.consumer_sensors = ConsumerCoordinatorMetrics(
|
||||
metrics, self.config['metric_group_prefix'], self._subscription)
|
||||
|
||||
self._cluster.request_update()
|
||||
self._cluster.add_listener(WeakMethod(self._handle_metadata_update))
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, '_cluster') and self._cluster:
|
||||
try:
|
||||
self._cluster.remove_listener(WeakMethod(self._handle_metadata_update))
|
||||
except TypeError:
|
||||
pass
|
||||
self._cluster.remove_listener(WeakMethod(self._handle_metadata_update))
|
||||
super(ConsumerCoordinator, self).__del__()
|
||||
|
||||
def protocol_type(self):
|
||||
@@ -208,8 +200,8 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
def _build_metadata_snapshot(self, subscription, cluster):
|
||||
metadata_snapshot = {}
|
||||
for topic in subscription.group_subscription():
|
||||
partitions = cluster.partitions_for_topic(topic)
|
||||
metadata_snapshot[topic] = partitions or set()
|
||||
partitions = cluster.partitions_for_topic(topic) or []
|
||||
metadata_snapshot[topic] = set(partitions)
|
||||
return metadata_snapshot
|
||||
|
||||
def _lookup_assignor(self, name):
|
||||
@@ -230,6 +222,10 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
|
||||
assignment = ConsumerProtocol.ASSIGNMENT.decode(member_assignment_bytes)
|
||||
|
||||
# set the flag to refresh last committed offsets
|
||||
self._subscription.needs_fetch_committed_offsets = True
|
||||
|
||||
# update partition assignment
|
||||
try:
|
||||
self._subscription.assign_from_subscribed(assignment.partitions())
|
||||
except ValueError as e:
|
||||
@@ -250,16 +246,16 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
assigned, self.group_id)
|
||||
|
||||
# execute the user's callback after rebalance
|
||||
if self._subscription.rebalance_listener:
|
||||
if self._subscription.listener:
|
||||
try:
|
||||
self._subscription.rebalance_listener.on_partitions_assigned(assigned)
|
||||
self._subscription.listener.on_partitions_assigned(assigned)
|
||||
except Exception:
|
||||
log.exception("User provided rebalance listener %s for group %s"
|
||||
log.exception("User provided listener %s for group %s"
|
||||
" failed on partition assignment: %s",
|
||||
self._subscription.rebalance_listener, self.group_id,
|
||||
self._subscription.listener, self.group_id,
|
||||
assigned)
|
||||
|
||||
def poll(self, timeout_ms=None):
|
||||
def poll(self):
|
||||
"""
|
||||
Poll for coordinator events. Only applicable if group_id is set, and
|
||||
broker version supports GroupCoordinators. This ensures that the
|
||||
@@ -268,46 +264,33 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
periodic offset commits if they are enabled.
|
||||
"""
|
||||
if self.group_id is None:
|
||||
return True
|
||||
return
|
||||
|
||||
timer = Timer(timeout_ms)
|
||||
try:
|
||||
self._invoke_completed_offset_commit_callbacks()
|
||||
if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
|
||||
log.debug('coordinator.poll: timeout in ensure_coordinator_ready; returning early')
|
||||
return False
|
||||
self._invoke_completed_offset_commit_callbacks()
|
||||
self.ensure_coordinator_ready()
|
||||
|
||||
if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned():
|
||||
if self.need_rejoin():
|
||||
# due to a race condition between the initial metadata fetch and the
|
||||
# initial rebalance, we need to ensure that the metadata is fresh
|
||||
# before joining initially, and then request the metadata update. If
|
||||
# metadata update arrives while the rebalance is still pending (for
|
||||
# example, when the join group is still inflight), then we will lose
|
||||
# track of the fact that we need to rebalance again to reflect the
|
||||
# change to the topic subscription. Without ensuring that the
|
||||
# metadata is fresh, any metadata update that changes the topic
|
||||
# subscriptions and arrives while a rebalance is in progress will
|
||||
# essentially be ignored. See KAFKA-3949 for the complete
|
||||
# description of the problem.
|
||||
if self._subscription.subscribed_pattern:
|
||||
metadata_update = self._client.cluster.request_update()
|
||||
self._client.poll(future=metadata_update, timeout_ms=timer.timeout_ms)
|
||||
if not metadata_update.is_done:
|
||||
log.debug('coordinator.poll: timeout updating metadata; returning early')
|
||||
return False
|
||||
if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned():
|
||||
if self.need_rejoin():
|
||||
# due to a race condition between the initial metadata fetch and the
|
||||
# initial rebalance, we need to ensure that the metadata is fresh
|
||||
# before joining initially, and then request the metadata update. If
|
||||
# metadata update arrives while the rebalance is still pending (for
|
||||
# example, when the join group is still inflight), then we will lose
|
||||
# track of the fact that we need to rebalance again to reflect the
|
||||
# change to the topic subscription. Without ensuring that the
|
||||
# metadata is fresh, any metadata update that changes the topic
|
||||
# subscriptions and arrives while a rebalance is in progress will
|
||||
# essentially be ignored. See KAFKA-3949 for the complete
|
||||
# description of the problem.
|
||||
if self._subscription.subscribed_pattern:
|
||||
metadata_update = self._client.cluster.request_update()
|
||||
self._client.poll(future=metadata_update)
|
||||
|
||||
if not self.ensure_active_group(timeout_ms=timer.timeout_ms):
|
||||
log.debug('coordinator.poll: timeout in ensure_active_group; returning early')
|
||||
return False
|
||||
self.ensure_active_group()
|
||||
|
||||
self.poll_heartbeat()
|
||||
self.poll_heartbeat()
|
||||
|
||||
self._maybe_auto_commit_offsets_async()
|
||||
return True
|
||||
|
||||
except Errors.KafkaTimeoutError:
|
||||
return False
|
||||
self._maybe_auto_commit_offsets_async()
|
||||
|
||||
def time_to_next_poll(self):
|
||||
"""Return seconds (float) remaining until :meth:`.poll` should be called again"""
|
||||
@@ -357,21 +340,21 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
group_assignment[member_id] = assignment
|
||||
return group_assignment
|
||||
|
||||
def _on_join_prepare(self, generation, member_id, timeout_ms=None):
|
||||
def _on_join_prepare(self, generation, member_id):
|
||||
# commit offsets prior to rebalance if auto-commit enabled
|
||||
self._maybe_auto_commit_offsets_sync(timeout_ms=timeout_ms)
|
||||
self._maybe_auto_commit_offsets_sync()
|
||||
|
||||
# execute the user's callback before rebalance
|
||||
log.info("Revoking previously assigned partitions %s for group %s",
|
||||
self._subscription.assigned_partitions(), self.group_id)
|
||||
if self._subscription.rebalance_listener:
|
||||
if self._subscription.listener:
|
||||
try:
|
||||
revoked = set(self._subscription.assigned_partitions())
|
||||
self._subscription.rebalance_listener.on_partitions_revoked(revoked)
|
||||
self._subscription.listener.on_partitions_revoked(revoked)
|
||||
except Exception:
|
||||
log.exception("User provided subscription rebalance listener %s"
|
||||
log.exception("User provided subscription listener %s"
|
||||
" for group %s failed on_partitions_revoked",
|
||||
self._subscription.rebalance_listener, self.group_id)
|
||||
self._subscription.listener, self.group_id)
|
||||
|
||||
self._is_leader = False
|
||||
self._subscription.reset_group_subscription()
|
||||
@@ -400,19 +383,17 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
|
||||
return super(ConsumerCoordinator, self).need_rejoin()
|
||||
|
||||
def refresh_committed_offsets_if_needed(self, timeout_ms=None):
|
||||
def refresh_committed_offsets_if_needed(self):
|
||||
"""Fetch committed offsets for assigned partitions."""
|
||||
missing_fetch_positions = set(self._subscription.missing_fetch_positions())
|
||||
try:
|
||||
offsets = self.fetch_committed_offsets(missing_fetch_positions, timeout_ms=timeout_ms)
|
||||
except Errors.KafkaTimeoutError:
|
||||
return False
|
||||
for partition, offset in six.iteritems(offsets):
|
||||
log.debug("Setting offset for partition %s to the committed offset %s", partition, offset.offset)
|
||||
self._subscription.seek(partition, offset.offset)
|
||||
return True
|
||||
if self._subscription.needs_fetch_committed_offsets:
|
||||
offsets = self.fetch_committed_offsets(self._subscription.assigned_partitions())
|
||||
for partition, offset in six.iteritems(offsets):
|
||||
# verify assignment is still active
|
||||
if self._subscription.is_assigned(partition):
|
||||
self._subscription.assignment[partition].committed = offset
|
||||
self._subscription.needs_fetch_committed_offsets = False
|
||||
|
||||
def fetch_committed_offsets(self, partitions, timeout_ms=None):
|
||||
def fetch_committed_offsets(self, partitions):
|
||||
"""Fetch the current committed offsets for specified partitions
|
||||
|
||||
Arguments:
|
||||
@@ -420,45 +401,26 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
|
||||
Returns:
|
||||
dict: {TopicPartition: OffsetAndMetadata}
|
||||
|
||||
Raises:
|
||||
KafkaTimeoutError if timeout_ms provided
|
||||
"""
|
||||
if not partitions:
|
||||
return {}
|
||||
|
||||
future_key = frozenset(partitions)
|
||||
timer = Timer(timeout_ms)
|
||||
while True:
|
||||
if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms):
|
||||
timer.maybe_raise()
|
||||
self.ensure_coordinator_ready()
|
||||
|
||||
# contact coordinator to fetch committed offsets
|
||||
if future_key in self._offset_fetch_futures:
|
||||
future = self._offset_fetch_futures[future_key]
|
||||
else:
|
||||
future = self._send_offset_fetch_request(partitions)
|
||||
self._offset_fetch_futures[future_key] = future
|
||||
future = self._send_offset_fetch_request(partitions)
|
||||
self._client.poll(future=future)
|
||||
|
||||
self._client.poll(future=future, timeout_ms=timer.timeout_ms)
|
||||
if future.succeeded():
|
||||
return future.value
|
||||
|
||||
if future.is_done:
|
||||
del self._offset_fetch_futures[future_key]
|
||||
if not future.retriable():
|
||||
raise future.exception # pylint: disable-msg=raising-bad-type
|
||||
|
||||
if future.succeeded():
|
||||
return future.value
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
|
||||
elif not future.retriable():
|
||||
raise future.exception # pylint: disable-msg=raising-bad-type
|
||||
|
||||
# future failed but is retriable, or is not done yet
|
||||
if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']:
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
else:
|
||||
time.sleep(timer.timeout_ms / 1000)
|
||||
timer.maybe_raise()
|
||||
|
||||
def close(self, autocommit=True, timeout_ms=None):
|
||||
def close(self, autocommit=True):
|
||||
"""Close the coordinator, leave the current group,
|
||||
and reset local generation / member_id.
|
||||
|
||||
@@ -469,14 +431,14 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
"""
|
||||
try:
|
||||
if autocommit:
|
||||
self._maybe_auto_commit_offsets_sync(timeout_ms=timeout_ms)
|
||||
self._maybe_auto_commit_offsets_sync()
|
||||
finally:
|
||||
super(ConsumerCoordinator, self).close(timeout_ms=timeout_ms)
|
||||
super(ConsumerCoordinator, self).close()
|
||||
|
||||
def _invoke_completed_offset_commit_callbacks(self):
|
||||
while self.completed_offset_commits:
|
||||
callback, offsets, res_or_exc = self.completed_offset_commits.popleft()
|
||||
callback(offsets, res_or_exc)
|
||||
callback, offsets, exception = self.completed_offset_commits.popleft()
|
||||
callback(offsets, exception)
|
||||
|
||||
def commit_offsets_async(self, offsets, callback=None):
|
||||
"""Commit specific offsets asynchronously.
|
||||
@@ -516,18 +478,18 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
return future
|
||||
|
||||
def _do_commit_offsets_async(self, offsets, callback=None):
|
||||
if self.config['api_version'] < (0, 8, 1):
|
||||
raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker')
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
|
||||
assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
|
||||
assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
|
||||
offsets.values()))
|
||||
if callback is None:
|
||||
callback = self.config['default_offset_commit_callback']
|
||||
self._subscription.needs_fetch_committed_offsets = True
|
||||
future = self._send_offset_commit_request(offsets)
|
||||
future.add_both(lambda res: self.completed_offset_commits.appendleft((callback, offsets, res)))
|
||||
return future
|
||||
|
||||
def commit_offsets_sync(self, offsets, timeout_ms=None):
|
||||
def commit_offsets_sync(self, offsets):
|
||||
"""Commit specific offsets synchronously.
|
||||
|
||||
This method will retry until the commit completes successfully or an
|
||||
@@ -538,8 +500,7 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
|
||||
Raises error on failure
|
||||
"""
|
||||
if self.config['api_version'] < (0, 8, 1):
|
||||
raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker')
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
|
||||
assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
|
||||
assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
|
||||
offsets.values()))
|
||||
@@ -547,31 +508,24 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
if not offsets:
|
||||
return
|
||||
|
||||
timer = Timer(timeout_ms)
|
||||
while True:
|
||||
self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms)
|
||||
self.ensure_coordinator_ready()
|
||||
|
||||
future = self._send_offset_commit_request(offsets)
|
||||
self._client.poll(future=future, timeout_ms=timer.timeout_ms)
|
||||
self._client.poll(future=future)
|
||||
|
||||
if future.is_done:
|
||||
if future.succeeded():
|
||||
return future.value
|
||||
if future.succeeded():
|
||||
return future.value
|
||||
|
||||
elif not future.retriable():
|
||||
raise future.exception # pylint: disable-msg=raising-bad-type
|
||||
if not future.retriable():
|
||||
raise future.exception # pylint: disable-msg=raising-bad-type
|
||||
|
||||
# future failed but is retriable, or it is still pending
|
||||
if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']:
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
else:
|
||||
time.sleep(timer.timeout_ms / 1000)
|
||||
timer.maybe_raise()
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
|
||||
def _maybe_auto_commit_offsets_sync(self, timeout_ms=None):
|
||||
def _maybe_auto_commit_offsets_sync(self):
|
||||
if self.config['enable_auto_commit']:
|
||||
try:
|
||||
self.commit_offsets_sync(self._subscription.all_consumed_offsets(), timeout_ms=timeout_ms)
|
||||
self.commit_offsets_sync(self._subscription.all_consumed_offsets())
|
||||
|
||||
# The three main group membership errors are known and should not
|
||||
# require a stacktrace -- just a warning
|
||||
@@ -599,8 +553,7 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
Returns:
|
||||
Future: indicating whether the commit was successful or not
|
||||
"""
|
||||
if self.config['api_version'] < (0, 8, 1):
|
||||
raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker')
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
|
||||
assert all(map(lambda k: isinstance(k, TopicPartition), offsets))
|
||||
assert all(map(lambda v: isinstance(v, OffsetAndMetadata),
|
||||
offsets.values()))
|
||||
@@ -610,46 +563,31 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
|
||||
node_id = self.coordinator()
|
||||
if node_id is None:
|
||||
return Future().failure(Errors.CoordinatorNotAvailableError)
|
||||
return Future().failure(Errors.GroupCoordinatorNotAvailableError)
|
||||
|
||||
# Verify node is ready
|
||||
if not self._client.ready(node_id, metadata_priority=False):
|
||||
log.debug("Node %s not ready -- failing offset commit request",
|
||||
node_id)
|
||||
return Future().failure(Errors.NodeNotReadyError)
|
||||
|
||||
# create the offset commit request
|
||||
offset_data = collections.defaultdict(dict)
|
||||
for tp, offset in six.iteritems(offsets):
|
||||
offset_data[tp.topic][tp.partition] = offset
|
||||
|
||||
version = self._client.api_version(OffsetCommitRequest, max_version=6)
|
||||
if version > 1 and self._subscription.partitions_auto_assigned():
|
||||
generation = self.generation_if_stable()
|
||||
if self._subscription.partitions_auto_assigned():
|
||||
generation = self.generation()
|
||||
else:
|
||||
generation = Generation.NO_GENERATION
|
||||
|
||||
# if the generation is None, we are not part of an active group
|
||||
# (and we expect to be). The only thing we can do is fail the commit
|
||||
# and let the user rejoin the group in poll()
|
||||
if generation is None:
|
||||
log.info("Failing OffsetCommit request since the consumer is not part of an active group")
|
||||
if self.rebalance_in_progress():
|
||||
# if the client knows it is already rebalancing, we can use RebalanceInProgressError instead of
|
||||
# CommitFailedError to indicate this is not a fatal error
|
||||
return Future().failure(Errors.RebalanceInProgressError(
|
||||
"Offset commit cannot be completed since the"
|
||||
" consumer is undergoing a rebalance for auto partition assignment. You can try completing the rebalance"
|
||||
" by calling poll() and then retry the operation."))
|
||||
else:
|
||||
return Future().failure(Errors.CommitFailedError(
|
||||
"Offset commit cannot be completed since the"
|
||||
" consumer is not part of an active group for auto partition assignment; it is likely that the consumer"
|
||||
" was kicked out of the group."))
|
||||
if self.config['api_version'] >= (0, 9) and generation is None:
|
||||
return Future().failure(Errors.CommitFailedError())
|
||||
|
||||
if version == 0:
|
||||
request = OffsetCommitRequest[version](
|
||||
if self.config['api_version'] >= (0, 9):
|
||||
request = OffsetCommitRequest[2](
|
||||
self.group_id,
|
||||
generation.generation_id,
|
||||
generation.member_id,
|
||||
OffsetCommitRequest[2].DEFAULT_RETENTION_TIME,
|
||||
[(
|
||||
topic, [(
|
||||
partition,
|
||||
@@ -658,28 +596,21 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
) for partition, offset in six.iteritems(partitions)]
|
||||
) for topic, partitions in six.iteritems(offset_data)]
|
||||
)
|
||||
elif version == 1:
|
||||
request = OffsetCommitRequest[version](
|
||||
self.group_id,
|
||||
# This api version was only used in v0.8.2, prior to join group apis
|
||||
# so this always ends up as NO_GENERATION
|
||||
generation.generation_id,
|
||||
generation.member_id,
|
||||
elif self.config['api_version'] >= (0, 8, 2):
|
||||
request = OffsetCommitRequest[1](
|
||||
self.group_id, -1, '',
|
||||
[(
|
||||
topic, [(
|
||||
partition,
|
||||
offset.offset,
|
||||
-1, # timestamp, unused
|
||||
-1,
|
||||
offset.metadata
|
||||
) for partition, offset in six.iteritems(partitions)]
|
||||
) for topic, partitions in six.iteritems(offset_data)]
|
||||
)
|
||||
elif version <= 4:
|
||||
request = OffsetCommitRequest[version](
|
||||
elif self.config['api_version'] >= (0, 8, 1):
|
||||
request = OffsetCommitRequest[0](
|
||||
self.group_id,
|
||||
generation.generation_id,
|
||||
generation.member_id,
|
||||
OffsetCommitRequest[version].DEFAULT_RETENTION_TIME,
|
||||
[(
|
||||
topic, [(
|
||||
partition,
|
||||
@@ -688,33 +619,6 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
) for partition, offset in six.iteritems(partitions)]
|
||||
) for topic, partitions in six.iteritems(offset_data)]
|
||||
)
|
||||
elif version <= 5:
|
||||
request = OffsetCommitRequest[version](
|
||||
self.group_id,
|
||||
generation.generation_id,
|
||||
generation.member_id,
|
||||
[(
|
||||
topic, [(
|
||||
partition,
|
||||
offset.offset,
|
||||
offset.metadata
|
||||
) for partition, offset in six.iteritems(partitions)]
|
||||
) for topic, partitions in six.iteritems(offset_data)]
|
||||
)
|
||||
else:
|
||||
request = OffsetCommitRequest[version](
|
||||
self.group_id,
|
||||
generation.generation_id,
|
||||
generation.member_id,
|
||||
[(
|
||||
topic, [(
|
||||
partition,
|
||||
offset.offset,
|
||||
offset.leader_epoch,
|
||||
offset.metadata
|
||||
) for partition, offset in six.iteritems(partitions)]
|
||||
) for topic, partitions in six.iteritems(offset_data)]
|
||||
)
|
||||
|
||||
log.debug("Sending offset-commit request with %s for group %s to %s",
|
||||
offsets, self.group_id, node_id)
|
||||
@@ -726,10 +630,8 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
return future
|
||||
|
||||
def _handle_offset_commit_response(self, offsets, future, send_time, response):
|
||||
log.debug("Received OffsetCommitResponse: %s", response)
|
||||
# TODO look at adding request_latency_ms to response (like java kafka)
|
||||
if self._consumer_sensors:
|
||||
self._consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
|
||||
self.consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
|
||||
unauthorized_topics = set()
|
||||
|
||||
for topic, partitions in response.topics:
|
||||
@@ -741,6 +643,8 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
if error_type is Errors.NoError:
|
||||
log.debug("Group %s committed offset %s for partition %s",
|
||||
self.group_id, offset, tp)
|
||||
if self._subscription.is_assigned(tp):
|
||||
self._subscription.assignment[tp].committed = offset
|
||||
elif error_type is Errors.GroupAuthorizationFailedError:
|
||||
log.error("Not authorized to commit offsets for group %s",
|
||||
self.group_id)
|
||||
@@ -755,38 +659,29 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
" %s", self.group_id, tp, error_type.__name__)
|
||||
future.failure(error_type())
|
||||
return
|
||||
elif error_type is Errors.CoordinatorLoadInProgressError:
|
||||
elif error_type is Errors.GroupLoadInProgressError:
|
||||
# just retry
|
||||
log.debug("OffsetCommit for group %s failed: %s",
|
||||
self.group_id, error_type.__name__)
|
||||
future.failure(error_type(self.group_id))
|
||||
return
|
||||
elif error_type in (Errors.CoordinatorNotAvailableError,
|
||||
Errors.NotCoordinatorError,
|
||||
elif error_type in (Errors.GroupCoordinatorNotAvailableError,
|
||||
Errors.NotCoordinatorForGroupError,
|
||||
Errors.RequestTimedOutError):
|
||||
log.debug("OffsetCommit for group %s failed: %s",
|
||||
self.group_id, error_type.__name__)
|
||||
self.coordinator_dead(error_type())
|
||||
future.failure(error_type(self.group_id))
|
||||
return
|
||||
elif error_type is Errors.RebalanceInProgressError:
|
||||
# Consumer never tries to commit offset in between join-group and sync-group,
|
||||
# and hence on broker-side it is not expected to see a commit offset request
|
||||
# during CompletingRebalance phase; if it ever happens then broker would return
|
||||
# this error. In this case we should just treat as a fatal CommitFailed exception.
|
||||
# However, we do not need to reset generations and just request re-join, such that
|
||||
# if the caller decides to proceed and poll, it would still try to proceed and re-join normally.
|
||||
self.request_rejoin()
|
||||
future.failure(Errors.CommitFailedError(error_type()))
|
||||
return
|
||||
elif error_type in (Errors.UnknownMemberIdError,
|
||||
Errors.IllegalGenerationError):
|
||||
# need reset generation and re-join group
|
||||
Errors.IllegalGenerationError,
|
||||
Errors.RebalanceInProgressError):
|
||||
# need to re-join group
|
||||
error = error_type(self.group_id)
|
||||
log.warning("OffsetCommit for group %s failed: %s",
|
||||
self.group_id, error)
|
||||
log.debug("OffsetCommit for group %s failed: %s",
|
||||
self.group_id, error)
|
||||
self.reset_generation()
|
||||
future.failure(Errors.CommitFailedError(error_type()))
|
||||
future.failure(Errors.CommitFailedError())
|
||||
return
|
||||
else:
|
||||
log.error("Group %s failed to commit partition %s at offset"
|
||||
@@ -814,18 +709,17 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
Returns:
|
||||
Future: resolves to dict of offsets: {TopicPartition: OffsetAndMetadata}
|
||||
"""
|
||||
if self.config['api_version'] < (0, 8, 1):
|
||||
raise Errors.UnsupportedVersionError('OffsetFetchRequest requires 0.8.1+ broker')
|
||||
assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API'
|
||||
assert all(map(lambda k: isinstance(k, TopicPartition), partitions))
|
||||
if not partitions:
|
||||
return Future().success({})
|
||||
|
||||
node_id = self.coordinator()
|
||||
if node_id is None:
|
||||
return Future().failure(Errors.CoordinatorNotAvailableError)
|
||||
return Future().failure(Errors.GroupCoordinatorNotAvailableError)
|
||||
|
||||
# Verify node is ready
|
||||
if not self._client.ready(node_id, metadata_priority=False):
|
||||
if not self._client.ready(node_id):
|
||||
log.debug("Node %s not ready -- failing offset fetch request",
|
||||
node_id)
|
||||
return Future().failure(Errors.NodeNotReadyError)
|
||||
@@ -837,13 +731,16 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
for tp in partitions:
|
||||
topic_partitions[tp.topic].add(tp.partition)
|
||||
|
||||
version = self._client.api_version(OffsetFetchRequest, max_version=5)
|
||||
# Starting in version 2, the request can contain a null topics array to indicate that offsets should be fetched
|
||||
# TODO: support
|
||||
request = OffsetFetchRequest[version](
|
||||
self.group_id,
|
||||
list(topic_partitions.items())
|
||||
)
|
||||
if self.config['api_version'] >= (0, 8, 2):
|
||||
request = OffsetFetchRequest[1](
|
||||
self.group_id,
|
||||
list(topic_partitions.items())
|
||||
)
|
||||
else:
|
||||
request = OffsetFetchRequest[0](
|
||||
self.group_id,
|
||||
list(topic_partitions.items())
|
||||
)
|
||||
|
||||
# send the request with a callback
|
||||
future = Future()
|
||||
@@ -853,46 +750,21 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
return future
|
||||
|
||||
def _handle_offset_fetch_response(self, future, response):
|
||||
log.debug("Received OffsetFetchResponse: %s", response)
|
||||
if response.API_VERSION >= 2 and response.error_code != Errors.NoError.errno:
|
||||
error_type = Errors.for_code(response.error_code)
|
||||
log.debug("Offset fetch failed: %s", error_type.__name__)
|
||||
error = error_type()
|
||||
if error_type is Errors.CoordinatorLoadInProgressError:
|
||||
# Retry
|
||||
future.failure(error)
|
||||
elif error_type is Errors.NotCoordinatorError:
|
||||
# re-discover the coordinator and retry
|
||||
self.coordinator_dead(error)
|
||||
future.failure(error)
|
||||
elif error_type is Errors.GroupAuthorizationFailedError:
|
||||
future.failure(error)
|
||||
else:
|
||||
log.error("Unknown error fetching offsets: %s", error)
|
||||
future.failure(error)
|
||||
return
|
||||
|
||||
offsets = {}
|
||||
for topic, partitions in response.topics:
|
||||
for partition_data in partitions:
|
||||
partition, offset = partition_data[:2]
|
||||
if response.API_VERSION >= 5:
|
||||
leader_epoch, metadata, error_code = partition_data[2:]
|
||||
else:
|
||||
metadata, error_code = partition_data[2:]
|
||||
leader_epoch = -1 # noqa: F841
|
||||
for partition, offset, metadata, error_code in partitions:
|
||||
tp = TopicPartition(topic, partition)
|
||||
error_type = Errors.for_code(error_code)
|
||||
if error_type is not Errors.NoError:
|
||||
error = error_type()
|
||||
log.debug("Group %s failed to fetch offset for partition"
|
||||
" %s: %s", self.group_id, tp, error)
|
||||
if error_type is Errors.CoordinatorLoadInProgressError:
|
||||
if error_type is Errors.GroupLoadInProgressError:
|
||||
# just retry
|
||||
future.failure(error)
|
||||
elif error_type is Errors.NotCoordinatorError:
|
||||
elif error_type is Errors.NotCoordinatorForGroupError:
|
||||
# re-discover the coordinator and retry
|
||||
self.coordinator_dead(error)
|
||||
self.coordinator_dead(error_type())
|
||||
future.failure(error)
|
||||
elif error_type is Errors.UnknownTopicOrPartitionError:
|
||||
log.warning("OffsetFetchRequest -- unknown topic %s"
|
||||
@@ -907,41 +779,34 @@ class ConsumerCoordinator(BaseCoordinator):
|
||||
elif offset >= 0:
|
||||
# record the position with the offset
|
||||
# (-1 indicates no committed offset to fetch)
|
||||
# TODO: save leader_epoch
|
||||
offsets[tp] = OffsetAndMetadata(offset, metadata, -1)
|
||||
offsets[tp] = OffsetAndMetadata(offset, metadata)
|
||||
else:
|
||||
log.debug("Group %s has no committed offset for partition"
|
||||
" %s", self.group_id, tp)
|
||||
future.success(offsets)
|
||||
|
||||
def _default_offset_commit_callback(self, offsets, res_or_exc):
|
||||
if isinstance(res_or_exc, Exception):
|
||||
def _default_offset_commit_callback(self, offsets, exception):
|
||||
if exception is not None:
|
||||
log.error("Offset commit failed: %s", exception)
|
||||
|
||||
def _commit_offsets_async_on_complete(self, offsets, exception):
|
||||
if exception is not None:
|
||||
log.warning("Auto offset commit failed for group %s: %s",
|
||||
self.group_id, res_or_exc)
|
||||
self.group_id, exception)
|
||||
if getattr(exception, 'retriable', False):
|
||||
self.next_auto_commit_deadline = min(time.time() + self.config['retry_backoff_ms'] / 1000, self.next_auto_commit_deadline)
|
||||
else:
|
||||
log.debug("Completed autocommit of offsets %s for group %s",
|
||||
offsets, self.group_id)
|
||||
|
||||
def _commit_offsets_async_on_complete(self, offsets, res_or_exc):
|
||||
if isinstance(res_or_exc, Exception) and getattr(res_or_exc, 'retriable', False):
|
||||
self.next_auto_commit_deadline = min(time.time() + self.config['retry_backoff_ms'] / 1000, self.next_auto_commit_deadline)
|
||||
self.config['default_offset_commit_callback'](offsets, res_or_exc)
|
||||
|
||||
def _maybe_auto_commit_offsets_async(self):
|
||||
if self.config['enable_auto_commit']:
|
||||
if self.coordinator_unknown():
|
||||
self.next_auto_commit_deadline = time.time() + self.config['retry_backoff_ms'] / 1000
|
||||
elif time.time() > self.next_auto_commit_deadline:
|
||||
self.next_auto_commit_deadline = time.time() + self.auto_commit_interval
|
||||
self._do_auto_commit_offsets_async()
|
||||
|
||||
def maybe_auto_commit_offsets_now(self):
|
||||
if self.config['enable_auto_commit'] and not self.coordinator_unknown():
|
||||
self._do_auto_commit_offsets_async()
|
||||
|
||||
def _do_auto_commit_offsets_async(self):
|
||||
self.commit_offsets_async(self._subscription.all_consumed_offsets(),
|
||||
self._commit_offsets_async_on_complete)
|
||||
self.commit_offsets_async(self._subscription.all_consumed_offsets(),
|
||||
self._commit_offsets_async_on_complete)
|
||||
|
||||
|
||||
class ConsumerCoordinatorMetrics(object):
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
import copy
|
||||
import logging
|
||||
import time
|
||||
|
||||
from kafka.errors import KafkaConfigurationError
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Heartbeat(object):
|
||||
DEFAULT_CONFIG = {
|
||||
@@ -25,13 +20,9 @@ class Heartbeat(object):
|
||||
self.config[key] = configs[key]
|
||||
|
||||
if self.config['group_id'] is not None:
|
||||
if self.config['heartbeat_interval_ms'] >= self.config['session_timeout_ms']:
|
||||
raise KafkaConfigurationError('Heartbeat interval must be lower than the session timeout (%s v %s)' % (
|
||||
self.config['heartbeat_interval_ms'], self.config['session_timeout_ms']))
|
||||
if self.config['heartbeat_interval_ms'] > (self.config['session_timeout_ms'] / 3):
|
||||
log.warning('heartbeat_interval_ms is high relative to session_timeout_ms (%s v %s).'
|
||||
' Recommend heartbeat interval less than 1/3rd of session timeout',
|
||||
self.config['heartbeat_interval_ms'], self.config['session_timeout_ms'])
|
||||
assert (self.config['heartbeat_interval_ms']
|
||||
<= self.config['session_timeout_ms']), (
|
||||
'Heartbeat interval must be lower than the session timeout')
|
||||
|
||||
self.last_send = -1 * float('inf')
|
||||
self.last_receive = -1 * float('inf')
|
||||
@@ -75,10 +66,3 @@ class Heartbeat(object):
|
||||
|
||||
def poll_timeout_expired(self):
|
||||
return (time.time() - self.last_poll) > (self.config['max_poll_interval_ms'] / 1000)
|
||||
|
||||
def __str__(self):
|
||||
return ("<Heartbeat group_id={group_id}"
|
||||
" heartbeat_interval_ms={heartbeat_interval_ms}"
|
||||
" session_timeout_ms={session_timeout_ms}"
|
||||
" max_poll_interval_ms={max_poll_interval_ms}"
|
||||
" retry_backoff_ms={retry_backoff_ms}>").format(**self.config)
|
||||
|
||||
@@ -16,39 +16,23 @@ class KafkaError(RuntimeError):
|
||||
super(KafkaError, self).__str__())
|
||||
|
||||
|
||||
class Cancelled(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class CommitFailedError(KafkaError):
|
||||
def __init__(self, *args):
|
||||
if not args:
|
||||
args = ("Commit cannot be completed since the group has already"
|
||||
" rebalanced and assigned the partitions to another member.",)
|
||||
super(CommitFailedError, self).__init__(*args)
|
||||
class IllegalStateError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class IllegalArgumentError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class IllegalStateError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class IncompatibleBrokerVersion(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class KafkaConfigurationError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class KafkaConnectionError(KafkaError):
|
||||
class NoBrokersAvailable(KafkaError):
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class NodeNotReadyError(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class KafkaProtocolError(KafkaError):
|
||||
retriable = True
|
||||
|
||||
@@ -57,37 +41,20 @@ class CorrelationIdError(KafkaProtocolError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class KafkaTimeoutError(KafkaError):
|
||||
class Cancelled(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class MetadataEmptyBrokerList(KafkaError):
|
||||
class TooManyInFlightRequests(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class NoBrokersAvailable(KafkaError):
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class NoOffsetForPartitionError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class NodeNotReadyError(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
class QuotaViolationError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class StaleMetadata(KafkaError):
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class TooManyInFlightRequests(KafkaError):
|
||||
class MetadataEmptyBrokerList(KafkaError):
|
||||
retriable = True
|
||||
|
||||
|
||||
@@ -95,10 +62,33 @@ class UnrecognizedBrokerVersion(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedCodecError(KafkaError):
|
||||
class IncompatibleBrokerVersion(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class CommitFailedError(KafkaError):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CommitFailedError, self).__init__(
|
||||
"""Commit cannot be completed since the group has already
|
||||
rebalanced and assigned the partitions to another member.
|
||||
This means that the time between subsequent calls to poll()
|
||||
was longer than the configured max_poll_interval_ms, which
|
||||
typically implies that the poll loop is spending too much
|
||||
time message processing. You can address this either by
|
||||
increasing the rebalance timeout with max_poll_interval_ms,
|
||||
or by reducing the maximum size of batches returned in poll()
|
||||
with max_poll_records.
|
||||
""", *args, **kwargs)
|
||||
|
||||
|
||||
class AuthenticationMethodNotSupported(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class AuthenticationFailedError(KafkaError):
|
||||
retriable = False
|
||||
|
||||
|
||||
class BrokerResponseError(KafkaError):
|
||||
errno = None
|
||||
message = None
|
||||
@@ -111,10 +101,6 @@ class BrokerResponseError(KafkaError):
|
||||
super(BrokerResponseError, self).__str__())
|
||||
|
||||
|
||||
class AuthorizationError(BrokerResponseError):
|
||||
pass
|
||||
|
||||
|
||||
class NoError(BrokerResponseError):
|
||||
errno = 0
|
||||
message = 'NO_ERROR'
|
||||
@@ -134,14 +120,14 @@ class OffsetOutOfRangeError(BrokerResponseError):
|
||||
' maintained by the server for the given topic/partition.')
|
||||
|
||||
|
||||
class CorruptRecordError(BrokerResponseError):
|
||||
class CorruptRecordException(BrokerResponseError):
|
||||
errno = 2
|
||||
message = 'CORRUPT_MESSAGE'
|
||||
description = ('This message has failed its CRC checksum, exceeds the'
|
||||
' valid size, or is otherwise corrupt.')
|
||||
|
||||
# Backward compatibility
|
||||
CorruptRecordException = CorruptRecordError
|
||||
InvalidMessageError = CorruptRecordException
|
||||
|
||||
|
||||
class UnknownTopicOrPartitionError(BrokerResponseError):
|
||||
@@ -200,8 +186,7 @@ class ReplicaNotAvailableError(BrokerResponseError):
|
||||
message = 'REPLICA_NOT_AVAILABLE'
|
||||
description = ('If replica is expected on a broker, but is not (this can be'
|
||||
' safely ignored).')
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class MessageSizeTooLargeError(BrokerResponseError):
|
||||
errno = 10
|
||||
@@ -225,35 +210,39 @@ class OffsetMetadataTooLargeError(BrokerResponseError):
|
||||
' offset metadata.')
|
||||
|
||||
|
||||
class NetworkExceptionError(BrokerResponseError):
|
||||
# TODO is this deprecated? https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ErrorCodes
|
||||
class StaleLeaderEpochCodeError(BrokerResponseError):
|
||||
errno = 13
|
||||
message = 'NETWORK_EXCEPTION'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
message = 'STALE_LEADER_EPOCH_CODE'
|
||||
|
||||
|
||||
class CoordinatorLoadInProgressError(BrokerResponseError):
|
||||
class GroupLoadInProgressError(BrokerResponseError):
|
||||
errno = 14
|
||||
message = 'COORDINATOR_LOAD_IN_PROGRESS'
|
||||
description = ('The broker returns this error code for txn or group requests,'
|
||||
' when the coordinator is loading and hence cant process requests')
|
||||
message = 'OFFSETS_LOAD_IN_PROGRESS'
|
||||
description = ('The broker returns this error code for an offset fetch'
|
||||
' request if it is still loading offsets (after a leader'
|
||||
' change for that offsets topic partition), or in response'
|
||||
' to group membership requests (such as heartbeats) when'
|
||||
' group metadata is being loaded by the coordinator.')
|
||||
retriable = True
|
||||
|
||||
|
||||
class CoordinatorNotAvailableError(BrokerResponseError):
|
||||
class GroupCoordinatorNotAvailableError(BrokerResponseError):
|
||||
errno = 15
|
||||
message = 'COORDINATOR_NOT_AVAILABLE'
|
||||
description = ('The broker returns this error code for consumer and transaction'
|
||||
message = 'CONSUMER_COORDINATOR_NOT_AVAILABLE'
|
||||
description = ('The broker returns this error code for group coordinator'
|
||||
' requests, offset commits, and most group management'
|
||||
' requests if the offsets topic has not yet been created, or'
|
||||
' if the group/txn coordinator is not active.')
|
||||
' if the group coordinator is not active.')
|
||||
retriable = True
|
||||
|
||||
|
||||
class NotCoordinatorError(BrokerResponseError):
|
||||
class NotCoordinatorForGroupError(BrokerResponseError):
|
||||
errno = 16
|
||||
message = 'NOT_COORDINATOR'
|
||||
description = ('The broker returns this error code if it is not the correct'
|
||||
' coordinator for the specified consumer or transaction group')
|
||||
message = 'NOT_COORDINATOR_FOR_CONSUMER'
|
||||
description = ('The broker returns this error code if it receives an offset'
|
||||
' fetch or commit request for a group that it is not a'
|
||||
' coordinator for.')
|
||||
retriable = True
|
||||
|
||||
|
||||
@@ -350,21 +339,21 @@ class InvalidCommitOffsetSizeError(BrokerResponseError):
|
||||
' because of oversize metadata.')
|
||||
|
||||
|
||||
class TopicAuthorizationFailedError(AuthorizationError):
|
||||
class TopicAuthorizationFailedError(BrokerResponseError):
|
||||
errno = 29
|
||||
message = 'TOPIC_AUTHORIZATION_FAILED'
|
||||
description = ('Returned by the broker when the client is not authorized to'
|
||||
' access the requested topic.')
|
||||
|
||||
|
||||
class GroupAuthorizationFailedError(AuthorizationError):
|
||||
class GroupAuthorizationFailedError(BrokerResponseError):
|
||||
errno = 30
|
||||
message = 'GROUP_AUTHORIZATION_FAILED'
|
||||
description = ('Returned by the broker when the client is not authorized to'
|
||||
' access a particular groupId.')
|
||||
|
||||
|
||||
class ClusterAuthorizationFailedError(AuthorizationError):
|
||||
class ClusterAuthorizationFailedError(BrokerResponseError):
|
||||
errno = 31
|
||||
message = 'CLUSTER_AUTHORIZATION_FAILED'
|
||||
description = ('Returned by the broker when the client is not authorized to'
|
||||
@@ -452,597 +441,65 @@ class PolicyViolationError(BrokerResponseError):
|
||||
errno = 44
|
||||
message = 'POLICY_VIOLATION'
|
||||
description = 'Request parameters do not satisfy the configured policy.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class OutOfOrderSequenceNumberError(BrokerResponseError):
|
||||
errno = 45
|
||||
message = 'OUT_OF_ORDER_SEQUENCE_NUMBER'
|
||||
description = 'The broker received an out of order sequence number.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DuplicateSequenceNumberError(BrokerResponseError):
|
||||
errno = 46
|
||||
message = 'DUPLICATE_SEQUENCE_NUMBER'
|
||||
description = 'The broker received a duplicate sequence number.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidProducerEpochError(BrokerResponseError):
|
||||
errno = 47
|
||||
message = 'INVALID_PRODUCER_EPOCH'
|
||||
description = 'Producer attempted to produce with an old epoch.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidTxnStateError(BrokerResponseError):
|
||||
errno = 48
|
||||
message = 'INVALID_TXN_STATE'
|
||||
description = 'The producer attempted a transactional operation in an invalid state.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidProducerIdMappingError(BrokerResponseError):
|
||||
errno = 49
|
||||
message = 'INVALID_PRODUCER_ID_MAPPING'
|
||||
description = 'The producer attempted to use a producer id which is not currently assigned to its transactional id.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidTransactionTimeoutError(BrokerResponseError):
|
||||
errno = 50
|
||||
message = 'INVALID_TRANSACTION_TIMEOUT'
|
||||
description = 'The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms).'
|
||||
retriable = False
|
||||
|
||||
|
||||
class ConcurrentTransactionsError(BrokerResponseError):
|
||||
errno = 51
|
||||
message = 'CONCURRENT_TRANSACTIONS'
|
||||
description = 'The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class TransactionCoordinatorFencedError(BrokerResponseError):
|
||||
errno = 52
|
||||
message = 'TRANSACTION_COORDINATOR_FENCED'
|
||||
description = 'Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class TransactionalIdAuthorizationFailedError(AuthorizationError):
|
||||
errno = 53
|
||||
message = 'TRANSACTIONAL_ID_AUTHORIZATION_FAILED'
|
||||
description = 'Transactional Id authorization failed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class SecurityDisabledError(BrokerResponseError):
|
||||
errno = 54
|
||||
message = 'SECURITY_DISABLED'
|
||||
description = 'Security features are disabled.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class OperationNotAttemptedError(BrokerResponseError):
|
||||
errno = 55
|
||||
message = 'OPERATION_NOT_ATTEMPTED'
|
||||
description = 'The broker did not attempt to execute this operation. This may happen for batched RPCs where some operations in the batch failed, causing the broker to respond without trying the rest.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class KafkaStorageError(BrokerResponseError):
|
||||
errno = 56
|
||||
message = 'KAFKA_STORAGE_ERROR'
|
||||
description = 'Disk error when trying to access log file on the disk.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class LogDirNotFoundError(BrokerResponseError):
|
||||
errno = 57
|
||||
message = 'LOG_DIR_NOT_FOUND'
|
||||
description = 'The user-specified log directory is not found in the broker config.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class SaslAuthenticationFailedError(BrokerResponseError):
|
||||
errno = 58
|
||||
message = 'SASL_AUTHENTICATION_FAILED'
|
||||
description = 'SASL Authentication failed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnknownProducerIdError(BrokerResponseError):
|
||||
errno = 59
|
||||
message = 'UNKNOWN_PRODUCER_ID'
|
||||
description = 'This exception is raised by the broker if it could not locate the producer metadata associated with the producerId in question. This could happen if, for instance, the producer\'s records were deleted because their retention time had elapsed. Once the last records of the producerId are removed, the producer\'s metadata is removed from the broker, and future appends by the producer will return this exception.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class ReassignmentInProgressError(BrokerResponseError):
|
||||
errno = 60
|
||||
message = 'REASSIGNMENT_IN_PROGRESS'
|
||||
description = 'A partition reassignment is in progress.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenAuthDisabledError(BrokerResponseError):
|
||||
errno = 61
|
||||
message = 'DELEGATION_TOKEN_AUTH_DISABLED'
|
||||
description = 'Delegation Token feature is not enabled.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenNotFoundError(BrokerResponseError):
|
||||
errno = 62
|
||||
message = 'DELEGATION_TOKEN_NOT_FOUND'
|
||||
description = 'Delegation Token is not found on server.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenOwnerMismatchError(BrokerResponseError):
|
||||
errno = 63
|
||||
message = 'DELEGATION_TOKEN_OWNER_MISMATCH'
|
||||
description = 'Specified Principal is not valid Owner/Renewer.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenRequestNotAllowedError(BrokerResponseError):
|
||||
errno = 64
|
||||
message = 'DELEGATION_TOKEN_REQUEST_NOT_ALLOWED'
|
||||
description = 'Delegation Token requests are not allowed on PLAINTEXT/1-way SSL channels and on delegation token authenticated channels.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenAuthorizationFailedError(AuthorizationError):
|
||||
errno = 65
|
||||
message = 'DELEGATION_TOKEN_AUTHORIZATION_FAILED'
|
||||
description = 'Delegation Token authorization failed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DelegationTokenExpiredError(BrokerResponseError):
|
||||
errno = 66
|
||||
message = 'DELEGATION_TOKEN_EXPIRED'
|
||||
description = 'Delegation Token is expired.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidPrincipalTypeError(BrokerResponseError):
|
||||
errno = 67
|
||||
message = 'INVALID_PRINCIPAL_TYPE'
|
||||
description = 'Supplied principalType is not supported.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class NonEmptyGroupError(BrokerResponseError):
|
||||
errno = 68
|
||||
message = 'NON_EMPTY_GROUP'
|
||||
description = 'The group is not empty.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class GroupIdNotFoundError(BrokerResponseError):
|
||||
errno = 69
|
||||
message = 'GROUP_ID_NOT_FOUND'
|
||||
description = 'The group id does not exist.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class FetchSessionIdNotFoundError(BrokerResponseError):
|
||||
errno = 70
|
||||
message = 'FETCH_SESSION_ID_NOT_FOUND'
|
||||
description = 'The fetch session ID was not found.'
|
||||
retriable = True
|
||||
class KafkaUnavailableError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidFetchSessionEpochError(BrokerResponseError):
|
||||
errno = 71
|
||||
message = 'INVALID_FETCH_SESSION_EPOCH'
|
||||
description = 'The fetch session epoch is invalid.'
|
||||
retriable = True
|
||||
class KafkaTimeoutError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class ListenerNotFoundError(BrokerResponseError):
|
||||
errno = 72
|
||||
message = 'LISTENER_NOT_FOUND'
|
||||
description = 'There is no listener on the leader broker that matches the listener on which metadata request was processed.'
|
||||
class FailedPayloadsError(KafkaError):
|
||||
def __init__(self, payload, *args):
|
||||
super(FailedPayloadsError, self).__init__(*args)
|
||||
self.payload = payload
|
||||
|
||||
|
||||
class KafkaConnectionError(KafkaError):
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class TopicDeletionDisabledError(BrokerResponseError):
|
||||
errno = 73
|
||||
message = 'TOPIC_DELETION_DISABLED'
|
||||
description = 'Topic deletion is disabled.'
|
||||
retriable = False
|
||||
class ProtocolError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class FencedLeaderEpochError(BrokerResponseError):
|
||||
errno = 74
|
||||
message = 'FENCED_LEADER_EPOCH'
|
||||
description = 'The leader epoch in the request is older than the epoch on the broker.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
class UnsupportedCodecError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownLeaderEpochError(BrokerResponseError):
|
||||
errno = 75
|
||||
message = 'UNKNOWN_LEADER_EPOCH'
|
||||
description = 'The leader epoch in the request is newer than the epoch on the broker.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
class KafkaConfigurationError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedCompressionTypeError(BrokerResponseError):
|
||||
errno = 76
|
||||
message = 'UNSUPPORTED_COMPRESSION_TYPE'
|
||||
description = 'The requesting client does not support the compression type of given partition.'
|
||||
retriable = False
|
||||
class QuotaViolationError(KafkaError):
|
||||
pass
|
||||
|
||||
|
||||
class StaleBrokerEpochError(BrokerResponseError):
|
||||
errno = 77
|
||||
message = 'STALE_BROKER_EPOCH'
|
||||
description = 'Broker epoch has changed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class OffsetNotAvailableError(BrokerResponseError):
|
||||
errno = 78
|
||||
message = 'OFFSET_NOT_AVAILABLE'
|
||||
description = 'The leader high watermark has not caught up from a recent leader election so the offsets cannot be guaranteed to be monotonically increasing.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class MemberIdRequiredError(BrokerResponseError):
|
||||
errno = 79
|
||||
message = 'MEMBER_ID_REQUIRED'
|
||||
description = 'The group member needs to have a valid member id before actually entering a consumer group.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class PreferredLeaderNotAvailableError(BrokerResponseError):
|
||||
errno = 80
|
||||
message = 'PREFERRED_LEADER_NOT_AVAILABLE'
|
||||
description = 'The preferred leader was not available.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class GroupMaxSizeReachedError(BrokerResponseError):
|
||||
errno = 81
|
||||
message = 'GROUP_MAX_SIZE_REACHED'
|
||||
description = 'The consumer group has reached its max size.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class FencedInstanceIdError(BrokerResponseError):
|
||||
errno = 82
|
||||
message = 'FENCED_INSTANCE_ID'
|
||||
description = 'The broker rejected this static consumer since another consumer with the same group.instance.id has registered with a different member.id.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class EligibleLeadersNotAvailableError(BrokerResponseError):
|
||||
errno = 83
|
||||
message = 'ELIGIBLE_LEADERS_NOT_AVAILABLE'
|
||||
description = 'Eligible topic partition leaders are not available.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class ElectionNotNeededError(BrokerResponseError):
|
||||
errno = 84
|
||||
message = 'ELECTION_NOT_NEEDED'
|
||||
description = 'Leader election not needed for topic partition.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class NoReassignmentInProgressError(BrokerResponseError):
|
||||
errno = 85
|
||||
message = 'NO_REASSIGNMENT_IN_PROGRESS'
|
||||
description = 'No partition reassignment is in progress.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class GroupSubscribedToTopicError(BrokerResponseError):
|
||||
errno = 86
|
||||
message = 'GROUP_SUBSCRIBED_TO_TOPIC'
|
||||
description = 'Deleting offsets of a topic is forbidden while the consumer group is actively subscribed to it.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidRecordError(BrokerResponseError):
|
||||
errno = 87
|
||||
message = 'INVALID_RECORD'
|
||||
description = 'This record has failed the validation on broker and hence will be rejected.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnstableOffsetCommitError(BrokerResponseError):
|
||||
errno = 88
|
||||
message = 'UNSTABLE_OFFSET_COMMIT'
|
||||
description = 'There are unstable offsets that need to be cleared.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class ThrottlingQuotaExceededError(BrokerResponseError):
|
||||
errno = 89
|
||||
message = 'THROTTLING_QUOTA_EXCEEDED'
|
||||
description = 'The throttling quota has been exceeded.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class ProducerFencedError(BrokerResponseError):
|
||||
errno = 90
|
||||
message = 'PRODUCER_FENCED'
|
||||
description = 'There is a newer producer with the same transactionalId which fences the current one.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class ResourceNotFoundError(BrokerResponseError):
|
||||
errno = 91
|
||||
message = 'RESOURCE_NOT_FOUND'
|
||||
description = 'A request illegally referred to a resource that does not exist.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DuplicateResourceError(BrokerResponseError):
|
||||
errno = 92
|
||||
message = 'DUPLICATE_RESOURCE'
|
||||
description = 'A request illegally referred to the same resource twice.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnacceptableCredentialError(BrokerResponseError):
|
||||
errno = 93
|
||||
message = 'UNACCEPTABLE_CREDENTIAL'
|
||||
description = 'Requested credential would not meet criteria for acceptability.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InconsistentVoterSetError(BrokerResponseError):
|
||||
errno = 94
|
||||
message = 'INCONSISTENT_VOTER_SET'
|
||||
description = 'Indicates that the either the sender or recipient of a voter-only request is not one of the expected voters.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidUpdateVersionError(BrokerResponseError):
|
||||
errno = 95
|
||||
message = 'INVALID_UPDATE_VERSION'
|
||||
description = 'The given update version was invalid.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class FeatureUpdateFailedError(BrokerResponseError):
|
||||
errno = 96
|
||||
message = 'FEATURE_UPDATE_FAILED'
|
||||
description = 'Unable to update finalized features due to an unexpected server error.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class PrincipalDeserializationFailureError(BrokerResponseError):
|
||||
errno = 97
|
||||
message = 'PRINCIPAL_DESERIALIZATION_FAILURE'
|
||||
description = 'Request principal deserialization failed during forwarding. This indicates an internal error on the broker cluster security setup.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class SnapshotNotFoundError(BrokerResponseError):
|
||||
errno = 98
|
||||
message = 'SNAPSHOT_NOT_FOUND'
|
||||
description = 'Requested snapshot was not found.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class PositionOutOfRangeError(BrokerResponseError):
|
||||
errno = 99
|
||||
message = 'POSITION_OUT_OF_RANGE'
|
||||
description = 'Requested position is not greater than or equal to zero, and less than the size of the snapshot.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnknownTopicIdError(BrokerResponseError):
|
||||
errno = 100
|
||||
message = 'UNKNOWN_TOPIC_ID'
|
||||
description = 'This server does not host this topic ID.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class DuplicateBrokerRegistrationError(BrokerResponseError):
|
||||
errno = 101
|
||||
message = 'DUPLICATE_BROKER_REGISTRATION'
|
||||
description = 'This broker ID is already in use.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class BrokerIdNotRegisteredError(BrokerResponseError):
|
||||
errno = 102
|
||||
message = 'BROKER_ID_NOT_REGISTERED'
|
||||
description = 'The given broker ID was not registered.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InconsistentTopicIdError(BrokerResponseError):
|
||||
errno = 103
|
||||
message = 'INCONSISTENT_TOPIC_ID'
|
||||
description = 'The log\'s topic ID did not match the topic ID in the request.'
|
||||
retriable = True
|
||||
invalid_metadata = True
|
||||
|
||||
|
||||
class InconsistentClusterIdError(BrokerResponseError):
|
||||
errno = 104
|
||||
message = 'INCONSISTENT_CLUSTER_ID'
|
||||
description = 'The clusterId in the request does not match that found on the server.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class TransactionalIdNotFoundError(BrokerResponseError):
|
||||
errno = 105
|
||||
message = 'TRANSACTIONAL_ID_NOT_FOUND'
|
||||
description = 'The transactionalId could not be found.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class FetchSessionTopicIdError(BrokerResponseError):
|
||||
errno = 106
|
||||
message = 'FETCH_SESSION_TOPIC_ID_ERROR'
|
||||
description = 'The fetch session encountered inconsistent topic ID usage.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class IneligibleReplicaError(BrokerResponseError):
|
||||
errno = 107
|
||||
message = 'INELIGIBLE_REPLICA'
|
||||
description = 'The new ISR contains at least one ineligible replica.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class NewLeaderElectedError(BrokerResponseError):
|
||||
errno = 108
|
||||
message = 'NEW_LEADER_ELECTED'
|
||||
description = 'The AlterPartition request successfully updated the partition state but the leader has changed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class OffsetMovedToTieredStorageError(BrokerResponseError):
|
||||
errno = 109
|
||||
message = 'OFFSET_MOVED_TO_TIERED_STORAGE'
|
||||
description = 'The requested offset is moved to tiered storage.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class FencedMemberEpochError(BrokerResponseError):
|
||||
errno = 110
|
||||
message = 'FENCED_MEMBER_EPOCH'
|
||||
description = 'The member epoch is fenced by the group coordinator. The member must abandon all its partitions and rejoin.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnreleasedInstanceIdError(BrokerResponseError):
|
||||
errno = 111
|
||||
message = 'UNRELEASED_INSTANCE_ID'
|
||||
description = 'The instance ID is still used by another member in the consumer group. That member must leave first.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnsupportedAssignorError(BrokerResponseError):
|
||||
errno = 112
|
||||
message = 'UNSUPPORTED_ASSIGNOR'
|
||||
description = 'The assignor or its version range is not supported by the consumer group.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class StaleMemberEpochError(BrokerResponseError):
|
||||
errno = 113
|
||||
message = 'STALE_MEMBER_EPOCH'
|
||||
description = 'The member epoch is stale. The member must retry after receiving its updated member epoch via the ConsumerGroupHeartbeat API.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class MismatchedEndpointTypeError(BrokerResponseError):
|
||||
errno = 114
|
||||
message = 'MISMATCHED_ENDPOINT_TYPE'
|
||||
description = 'The request was sent to an endpoint of the wrong type.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnsupportedEndpointTypeError(BrokerResponseError):
|
||||
errno = 115
|
||||
message = 'UNSUPPORTED_ENDPOINT_TYPE'
|
||||
description = 'This endpoint type is not supported yet.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnknownControllerIdError(BrokerResponseError):
|
||||
errno = 116
|
||||
message = 'UNKNOWN_CONTROLLER_ID'
|
||||
description = 'This controller ID is not known.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class UnknownSubscriptionIdError(BrokerResponseError):
|
||||
errno = 117
|
||||
message = 'UNKNOWN_SUBSCRIPTION_ID'
|
||||
description = 'Client sent a push telemetry request with an invalid or outdated subscription ID.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class TelemetryTooLargeError(BrokerResponseError):
|
||||
errno = 118
|
||||
message = 'TELEMETRY_TOO_LARGE'
|
||||
description = 'Client sent a push telemetry request larger than the maximum size the broker will accept.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidRegistrationError(BrokerResponseError):
|
||||
errno = 119
|
||||
message = 'INVALID_REGISTRATION'
|
||||
description = 'The controller has considered the broker registration to be invalid.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class TransactionAbortableError(BrokerResponseError):
|
||||
errno = 120
|
||||
message = 'TRANSACTION_ABORTABLE'
|
||||
description = 'The server encountered an error with the transaction. The client can abort the transaction to continue using this transactional ID.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidRecordStateError(BrokerResponseError):
|
||||
errno = 121
|
||||
message = 'INVALID_RECORD_STATE'
|
||||
description = 'The record state is invalid. The acknowledgement of delivery could not be completed.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class ShareSessionNotFoundError(BrokerResponseError):
|
||||
errno = 122
|
||||
message = 'SHARE_SESSION_NOT_FOUND'
|
||||
description = 'The share session was not found.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class InvalidShareSessionEpochError(BrokerResponseError):
|
||||
errno = 123
|
||||
message = 'INVALID_SHARE_SESSION_EPOCH'
|
||||
description = 'The share session epoch is invalid.'
|
||||
retriable = True
|
||||
|
||||
|
||||
class FencedStateEpochError(BrokerResponseError):
|
||||
errno = 124
|
||||
message = 'FENCED_STATE_EPOCH'
|
||||
description = 'The share coordinator rejected the request because the share-group state epoch did not match.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class InvalidVoterKeyError(BrokerResponseError):
|
||||
errno = 125
|
||||
message = 'INVALID_VOTER_KEY'
|
||||
description = 'The voter key doesn\'t match the receiving replica\'s key.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class DuplicateVoterError(BrokerResponseError):
|
||||
errno = 126
|
||||
message = 'DUPLICATE_VOTER'
|
||||
description = 'The voter is already part of the set of voters.'
|
||||
retriable = False
|
||||
|
||||
|
||||
class VoterNotFoundError(BrokerResponseError):
|
||||
errno = 127
|
||||
message = 'VOTER_NOT_FOUND'
|
||||
description = 'The voter is not part of the set of voters.'
|
||||
retriable = False
|
||||
class AsyncProducerQueueFull(KafkaError):
|
||||
def __init__(self, failed_msgs, *args):
|
||||
super(AsyncProducerQueueFull, self).__init__(*args)
|
||||
self.failed_msgs = failed_msgs
|
||||
|
||||
|
||||
def _iter_broker_errors():
|
||||
@@ -1055,12 +512,27 @@ kafka_errors = dict([(x.errno, x) for x in _iter_broker_errors()])
|
||||
|
||||
|
||||
def for_code(error_code):
|
||||
if error_code in kafka_errors:
|
||||
return kafka_errors[error_code]
|
||||
else:
|
||||
# The broker error code was not found in our list. This can happen when connecting
|
||||
# to a newer broker (with new error codes), or simply because our error list is
|
||||
# not complete.
|
||||
#
|
||||
# To avoid dropping the error code, create a dynamic error class w/ errno override.
|
||||
return type('UnrecognizedBrokerError', (UnknownError,), {'errno': error_code})
|
||||
return kafka_errors.get(error_code, UnknownError)
|
||||
|
||||
|
||||
def check_error(response):
|
||||
if isinstance(response, Exception):
|
||||
raise response
|
||||
if response.error:
|
||||
error_class = kafka_errors.get(response.error, UnknownError)
|
||||
raise error_class(response)
|
||||
|
||||
|
||||
RETRY_BACKOFF_ERROR_TYPES = (
|
||||
KafkaUnavailableError, LeaderNotAvailableError,
|
||||
KafkaConnectionError, FailedPayloadsError
|
||||
)
|
||||
|
||||
|
||||
RETRY_REFRESH_ERROR_TYPES = (
|
||||
NotLeaderForPartitionError, UnknownTopicOrPartitionError,
|
||||
LeaderNotAvailableError, KafkaConnectionError
|
||||
)
|
||||
|
||||
|
||||
RETRY_ERROR_TYPES = RETRY_BACKOFF_ERROR_TYPES + RETRY_REFRESH_ERROR_TYPES
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import threading
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,7 +15,6 @@ class Future(object):
|
||||
self.exception = None
|
||||
self._callbacks = []
|
||||
self._errbacks = []
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def succeeded(self):
|
||||
return self.is_done and not bool(self.exception)
|
||||
@@ -32,46 +30,37 @@ class Future(object):
|
||||
|
||||
def success(self, value):
|
||||
assert not self.is_done, 'Future is already complete'
|
||||
with self._lock:
|
||||
self.value = value
|
||||
self.is_done = True
|
||||
self.value = value
|
||||
self.is_done = True
|
||||
if self._callbacks:
|
||||
self._call_backs('callback', self._callbacks, self.value)
|
||||
return self
|
||||
|
||||
def failure(self, e):
|
||||
assert not self.is_done, 'Future is already complete'
|
||||
exception = e if type(e) is not type else e()
|
||||
assert isinstance(exception, BaseException), (
|
||||
self.exception = e if type(e) is not type else e()
|
||||
assert isinstance(self.exception, BaseException), (
|
||||
'future failed without an exception')
|
||||
with self._lock:
|
||||
self.exception = exception
|
||||
self.is_done = True
|
||||
self.is_done = True
|
||||
self._call_backs('errback', self._errbacks, self.exception)
|
||||
return self
|
||||
|
||||
def add_callback(self, f, *args, **kwargs):
|
||||
if args or kwargs:
|
||||
f = functools.partial(f, *args, **kwargs)
|
||||
with self._lock:
|
||||
if not self.is_done:
|
||||
self._callbacks.append(f)
|
||||
elif self.succeeded():
|
||||
self._lock.release()
|
||||
self._call_backs('callback', [f], self.value)
|
||||
self._lock.acquire()
|
||||
if self.is_done and not self.exception:
|
||||
self._call_backs('callback', [f], self.value)
|
||||
else:
|
||||
self._callbacks.append(f)
|
||||
return self
|
||||
|
||||
def add_errback(self, f, *args, **kwargs):
|
||||
if args or kwargs:
|
||||
f = functools.partial(f, *args, **kwargs)
|
||||
with self._lock:
|
||||
if not self.is_done:
|
||||
self._errbacks.append(f)
|
||||
elif self.failed():
|
||||
self._lock.release()
|
||||
self._call_backs('errback', [f], self.exception)
|
||||
self._lock.acquire()
|
||||
if self.is_done and self.exception:
|
||||
self._call_backs('errback', [f], self.exception)
|
||||
else:
|
||||
self._errbacks.append(f)
|
||||
return self
|
||||
|
||||
def add_both(self, f, *args, **kwargs):
|
||||
|
||||
@@ -3,16 +3,16 @@ from __future__ import absolute_import
|
||||
import abc
|
||||
|
||||
from kafka.metrics.stat import AbstractStat
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractCompoundStat(AbstractStat):
|
||||
"""
|
||||
A compound stat is a stat where a single measurement and associated
|
||||
data structure feeds many metrics. This is the example for a
|
||||
histogram which has many associated percentiles.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def stats(self):
|
||||
"""
|
||||
Return list of NamedMeasurable
|
||||
@@ -21,8 +21,6 @@ class AbstractCompoundStat(AbstractStat):
|
||||
|
||||
|
||||
class NamedMeasurable(object):
|
||||
__slots__ = ('_name', '_stat')
|
||||
|
||||
def __init__(self, metric_name, measurable_stat):
|
||||
self._name = metric_name
|
||||
self._stat = measurable_stat
|
||||
|
||||
@@ -4,8 +4,6 @@ import time
|
||||
|
||||
|
||||
class KafkaMetric(object):
|
||||
__slots__ = ('_metric_name', '_measurable', '_config')
|
||||
|
||||
# NOTE java constructor takes a lock instance
|
||||
def __init__(self, metric_name, measurable, config):
|
||||
if not metric_name:
|
||||
@@ -35,4 +33,4 @@ class KafkaMetric(object):
|
||||
def value(self, time_ms=None):
|
||||
if time_ms is None:
|
||||
time_ms = time.time() * 1000
|
||||
return self._measurable.measure(self._config, time_ms)
|
||||
return self.measurable.measure(self.config, time_ms)
|
||||
|
||||
@@ -4,10 +4,8 @@ import abc
|
||||
|
||||
from kafka.metrics.measurable import AbstractMeasurable
|
||||
from kafka.metrics.stat import AbstractStat
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractMeasurableStat(AbstractStat, AbstractMeasurable):
|
||||
"""
|
||||
An AbstractMeasurableStat is an AbstractStat that is also
|
||||
@@ -15,3 +13,4 @@ class AbstractMeasurableStat(AbstractStat, AbstractMeasurable):
|
||||
This is the interface used for most of the simple statistics such
|
||||
as Avg, Max, Count, etc.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@@ -5,8 +5,6 @@ import sys
|
||||
|
||||
class MetricConfig(object):
|
||||
"""Configuration values for metrics"""
|
||||
__slots__ = ('quota', '_samples', 'event_window', 'time_window_ms', 'tags')
|
||||
|
||||
def __init__(self, quota=None, samples=2, event_window=sys.maxsize,
|
||||
time_window_ms=30 * 1000, tags=None):
|
||||
"""
|
||||
|
||||
@@ -38,7 +38,6 @@ class MetricName(object):
|
||||
# as messages are sent we record the sizes
|
||||
sensor.record(message_size)
|
||||
"""
|
||||
__slots__ = ('_name', '_group', '_description', '_tags', '_hash')
|
||||
|
||||
def __init__(self, name, group, description=None, tags=None):
|
||||
"""
|
||||
@@ -94,7 +93,7 @@ class MetricName(object):
|
||||
return True
|
||||
if other is None:
|
||||
return False
|
||||
return (isinstance(self, type(other)) and
|
||||
return (type(self) == type(other) and
|
||||
self.group == other.group and
|
||||
self.name == other.name and
|
||||
self.tags == other.tags)
|
||||
|
||||
@@ -55,11 +55,10 @@ class Metrics(object):
|
||||
self._reporters = reporters or []
|
||||
for reporter in self._reporters:
|
||||
reporter.init([])
|
||||
self._closed = False
|
||||
|
||||
if enable_expiration:
|
||||
def expire_loop():
|
||||
while not self._closed:
|
||||
while True:
|
||||
# delay 30 seconds
|
||||
time.sleep(30)
|
||||
self.ExpireSensorTask.run(self)
|
||||
@@ -260,4 +259,3 @@ class Metrics(object):
|
||||
reporter.close()
|
||||
|
||||
self._metrics.clear()
|
||||
self._closed = True
|
||||
|
||||
@@ -2,15 +2,14 @@ from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractMetricsReporter(object):
|
||||
"""
|
||||
An abstract class to allow things to listen as new metrics
|
||||
are created so they can be reported.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def init(self, metrics):
|
||||
"""
|
||||
|
||||
@@ -3,8 +3,6 @@ from __future__ import absolute_import
|
||||
|
||||
class Quota(object):
|
||||
"""An upper or lower bound for metrics"""
|
||||
__slots__ = ('_bound', '_upper')
|
||||
|
||||
def __init__(self, bound, is_upper):
|
||||
self._bound = bound
|
||||
self._upper = is_upper
|
||||
@@ -36,7 +34,7 @@ class Quota(object):
|
||||
def __eq__(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return (isinstance(self, type(other)) and
|
||||
return (type(self) == type(other) and
|
||||
self.bound == other.bound and
|
||||
self.is_upper_bound() == other.is_upper_bound())
|
||||
|
||||
|
||||
@@ -2,15 +2,14 @@ from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractStat(object):
|
||||
"""
|
||||
An AbstractStat is a quantity such as average, max, etc that is computed
|
||||
off the stream of updates to a sensor
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def record(self, config, value, time_ms):
|
||||
"""
|
||||
|
||||
@@ -7,8 +7,6 @@ class Avg(AbstractSampledStat):
|
||||
"""
|
||||
An AbstractSampledStat that maintains a simple average over its samples.
|
||||
"""
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
|
||||
def __init__(self):
|
||||
super(Avg, self).__init__(0.0)
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@ class Count(AbstractSampledStat):
|
||||
"""
|
||||
An AbstractSampledStat that maintains a simple count of what it has seen.
|
||||
"""
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
|
||||
def __init__(self):
|
||||
super(Count, self).__init__(0.0)
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ import math
|
||||
|
||||
|
||||
class Histogram(object):
|
||||
__slots__ = ('_hist', '_count', '_bin_scheme')
|
||||
|
||||
def __init__(self, bin_scheme):
|
||||
self._hist = [0.0] * bin_scheme.bins
|
||||
self._count = 0.0
|
||||
@@ -42,8 +40,6 @@ class Histogram(object):
|
||||
return '{%s}' % ','.join(values)
|
||||
|
||||
class ConstantBinScheme(object):
|
||||
__slots__ = ('_min', '_max', '_bins', '_bucket_width')
|
||||
|
||||
def __init__(self, bins, min_val, max_val):
|
||||
if bins < 2:
|
||||
raise ValueError('Must have at least 2 bins.')
|
||||
@@ -73,8 +69,6 @@ class Histogram(object):
|
||||
return int(((x - self._min) / self._bucket_width) + 1)
|
||||
|
||||
class LinearBinScheme(object):
|
||||
__slots__ = ('_bins', '_max', '_scale')
|
||||
|
||||
def __init__(self, num_bins, max_val):
|
||||
self._bins = num_bins
|
||||
self._max = max_val
|
||||
|
||||
@@ -5,8 +5,6 @@ from kafka.metrics.stats.sampled_stat import AbstractSampledStat
|
||||
|
||||
class Max(AbstractSampledStat):
|
||||
"""An AbstractSampledStat that gives the max over its samples."""
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
|
||||
def __init__(self):
|
||||
super(Max, self).__init__(float('-inf'))
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@ from kafka.metrics.stats.sampled_stat import AbstractSampledStat
|
||||
|
||||
class Min(AbstractSampledStat):
|
||||
"""An AbstractSampledStat that gives the min over its samples."""
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
|
||||
def __init__(self):
|
||||
super(Min, self).__init__(float(sys.maxsize))
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ from __future__ import absolute_import
|
||||
|
||||
|
||||
class Percentile(object):
|
||||
__slots__ = ('_metric_name', '_percentile')
|
||||
|
||||
def __init__(self, metric_name, percentile):
|
||||
self._metric_name = metric_name
|
||||
self._percentile = float(percentile)
|
||||
|
||||
@@ -13,9 +13,6 @@ class BucketSizing(object):
|
||||
|
||||
class Percentiles(AbstractSampledStat, AbstractCompoundStat):
|
||||
"""A compound stat that reports one or more percentiles"""
|
||||
__slots__ = ('_initial_value', '_samples', '_current',
|
||||
'_percentiles', '_buckets', '_bin_scheme')
|
||||
|
||||
def __init__(self, size_in_bytes, bucketing, max_val, min_val=0.0,
|
||||
percentiles=None):
|
||||
super(Percentiles, self).__init__(0.0)
|
||||
@@ -30,7 +27,7 @@ class Percentiles(AbstractSampledStat, AbstractCompoundStat):
|
||||
' to be 0.0.')
|
||||
self.bin_scheme = Histogram.LinearBinScheme(self._buckets, max_val)
|
||||
else:
|
||||
raise ValueError('Unknown bucket type: %s' % (bucketing,))
|
||||
ValueError('Unknown bucket type: %s' % (bucketing,))
|
||||
|
||||
def stats(self):
|
||||
measurables = []
|
||||
|
||||
@@ -37,8 +37,6 @@ class Rate(AbstractMeasurableStat):
|
||||
occurrences (e.g. the count of values measured over the time interval)
|
||||
or other such values.
|
||||
"""
|
||||
__slots__ = ('_stat', '_unit')
|
||||
|
||||
def __init__(self, time_unit=TimeUnit.SECONDS, sampled_stat=None):
|
||||
self._stat = sampled_stat or SampledTotal()
|
||||
self._unit = time_unit
|
||||
@@ -107,7 +105,6 @@ class Rate(AbstractMeasurableStat):
|
||||
|
||||
|
||||
class SampledTotal(AbstractSampledStat):
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
def __init__(self, initial_value=None):
|
||||
if initial_value is not None:
|
||||
raise ValueError('initial_value cannot be set on SampledTotal')
|
||||
|
||||
@@ -3,10 +3,8 @@ from __future__ import absolute_import
|
||||
import abc
|
||||
|
||||
from kafka.metrics.measurable_stat import AbstractMeasurableStat
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractSampledStat(AbstractMeasurableStat):
|
||||
"""
|
||||
An AbstractSampledStat records a single scalar value measured over
|
||||
@@ -22,7 +20,7 @@ class AbstractSampledStat(AbstractMeasurableStat):
|
||||
Subclasses of this class define different statistics measured
|
||||
using this basic pattern.
|
||||
"""
|
||||
__slots__ = ('_initial_value', '_samples', '_current')
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self, initial_value):
|
||||
self._initial_value = initial_value
|
||||
|
||||
@@ -15,10 +15,6 @@ class Sensor(object):
|
||||
the `record(double)` api and would maintain a set
|
||||
of metrics about request sizes such as the average or max.
|
||||
"""
|
||||
__slots__ = ('_lock', '_registry', '_name', '_parents', '_metrics',
|
||||
'_stats', '_config', '_inactive_sensor_expiration_time_ms',
|
||||
'_last_record_time')
|
||||
|
||||
def __init__(self, registry, name, parents, config,
|
||||
inactive_sensor_expiration_time_seconds):
|
||||
if not name:
|
||||
|
||||
@@ -5,8 +5,6 @@ from kafka.metrics.measurable_stat import AbstractMeasurableStat
|
||||
|
||||
class Total(AbstractMeasurableStat):
|
||||
"""An un-windowed cumulative total maintained over all time."""
|
||||
__slots__ = ('_total')
|
||||
|
||||
def __init__(self, value=0.0):
|
||||
self._total = value
|
||||
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.oauth.abstract import AbstractTokenProvider
|
||||
42
venv/lib/python3.12/site-packages/kafka/oauth/abstract.py
Normal file
42
venv/lib/python3.12/site-packages/kafka/oauth/abstract.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
# This statement is compatible with both Python 2.7 & 3+
|
||||
ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()})
|
||||
|
||||
class AbstractTokenProvider(ABC):
|
||||
"""
|
||||
A Token Provider must be used for the SASL OAuthBearer protocol.
|
||||
|
||||
The implementation should ensure token reuse so that multiple
|
||||
calls at connect time do not create multiple tokens. The implementation
|
||||
should also periodically refresh the token in order to guarantee
|
||||
that each call returns an unexpired token. A timeout error should
|
||||
be returned after a short period of inactivity so that the
|
||||
broker can log debugging info and retry.
|
||||
|
||||
Token Providers MUST implement the token() method
|
||||
"""
|
||||
|
||||
def __init__(self, **config):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def token(self):
|
||||
"""
|
||||
Returns a (str) ID/Access Token to be sent to the Kafka
|
||||
client.
|
||||
"""
|
||||
pass
|
||||
|
||||
def extensions(self):
|
||||
"""
|
||||
This is an OPTIONAL method that may be implemented.
|
||||
|
||||
Returns a map of key-value pairs that can
|
||||
be sent with the SASL/OAUTHBEARER initial client request. If
|
||||
not implemented, the values are ignored. This feature is only available
|
||||
in Kafka >= 2.1.0.
|
||||
"""
|
||||
return {}
|
||||
115
venv/lib/python3.12/site-packages/kafka/producer/buffer.py
Normal file
115
venv/lib/python3.12/site-packages/kafka/producer/buffer.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
import collections
|
||||
import io
|
||||
import threading
|
||||
import time
|
||||
|
||||
from kafka.metrics.stats import Rate
|
||||
|
||||
import kafka.errors as Errors
|
||||
|
||||
|
||||
class SimpleBufferPool(object):
|
||||
"""A simple pool of BytesIO objects with a weak memory ceiling."""
|
||||
def __init__(self, memory, poolable_size, metrics=None, metric_group_prefix='producer-metrics'):
|
||||
"""Create a new buffer pool.
|
||||
|
||||
Arguments:
|
||||
memory (int): maximum memory that this buffer pool can allocate
|
||||
poolable_size (int): memory size per buffer to cache in the free
|
||||
list rather than deallocating
|
||||
"""
|
||||
self._poolable_size = poolable_size
|
||||
self._lock = threading.RLock()
|
||||
|
||||
buffers = int(memory / poolable_size) if poolable_size else 0
|
||||
self._free = collections.deque([io.BytesIO() for _ in range(buffers)])
|
||||
|
||||
self._waiters = collections.deque()
|
||||
self.wait_time = None
|
||||
if metrics:
|
||||
self.wait_time = metrics.sensor('bufferpool-wait-time')
|
||||
self.wait_time.add(metrics.metric_name(
|
||||
'bufferpool-wait-ratio', metric_group_prefix,
|
||||
'The fraction of time an appender waits for space allocation.'),
|
||||
Rate())
|
||||
|
||||
def allocate(self, size, max_time_to_block_ms):
|
||||
"""
|
||||
Allocate a buffer of the given size. This method blocks if there is not
|
||||
enough memory and the buffer pool is configured with blocking mode.
|
||||
|
||||
Arguments:
|
||||
size (int): The buffer size to allocate in bytes [ignored]
|
||||
max_time_to_block_ms (int): The maximum time in milliseconds to
|
||||
block for buffer memory to be available
|
||||
|
||||
Returns:
|
||||
io.BytesIO
|
||||
"""
|
||||
with self._lock:
|
||||
# check if we have a free buffer of the right size pooled
|
||||
if self._free:
|
||||
return self._free.popleft()
|
||||
|
||||
elif self._poolable_size == 0:
|
||||
return io.BytesIO()
|
||||
|
||||
else:
|
||||
# we are out of buffers and will have to block
|
||||
buf = None
|
||||
more_memory = threading.Condition(self._lock)
|
||||
self._waiters.append(more_memory)
|
||||
# loop over and over until we have a buffer or have reserved
|
||||
# enough memory to allocate one
|
||||
while buf is None:
|
||||
start_wait = time.time()
|
||||
more_memory.wait(max_time_to_block_ms / 1000.0)
|
||||
end_wait = time.time()
|
||||
if self.wait_time:
|
||||
self.wait_time.record(end_wait - start_wait)
|
||||
|
||||
if self._free:
|
||||
buf = self._free.popleft()
|
||||
else:
|
||||
self._waiters.remove(more_memory)
|
||||
raise Errors.KafkaTimeoutError(
|
||||
"Failed to allocate memory within the configured"
|
||||
" max blocking time")
|
||||
|
||||
# remove the condition for this thread to let the next thread
|
||||
# in line start getting memory
|
||||
removed = self._waiters.popleft()
|
||||
assert removed is more_memory, 'Wrong condition'
|
||||
|
||||
# signal any additional waiters if there is more memory left
|
||||
# over for them
|
||||
if self._free and self._waiters:
|
||||
self._waiters[0].notify()
|
||||
|
||||
# unlock and return the buffer
|
||||
return buf
|
||||
|
||||
def deallocate(self, buf):
|
||||
"""
|
||||
Return buffers to the pool. If they are of the poolable size add them
|
||||
to the free list, otherwise just mark the memory as free.
|
||||
|
||||
Arguments:
|
||||
buffer_ (io.BytesIO): The buffer to return
|
||||
"""
|
||||
with self._lock:
|
||||
# BytesIO.truncate here makes the pool somewhat pointless
|
||||
# but we stick with the BufferPool API until migrating to
|
||||
# bytesarray / memoryview. The buffer we return must not
|
||||
# expose any prior data on read().
|
||||
buf.truncate(0)
|
||||
self._free.append(buf)
|
||||
if self._waiters:
|
||||
self._waiters[0].notify()
|
||||
|
||||
def queued(self):
|
||||
"""The number of threads blocked waiting on memory."""
|
||||
with self._lock:
|
||||
return len(self._waiters)
|
||||
@@ -38,7 +38,7 @@ class FutureRecordMetadata(Future):
|
||||
produce_future.add_errback(self.failure)
|
||||
|
||||
def _produce_success(self, offset_and_timestamp):
|
||||
offset, produce_timestamp_ms = offset_and_timestamp
|
||||
offset, produce_timestamp_ms, log_start_offset = offset_and_timestamp
|
||||
|
||||
# Unpacking from args tuple is minor speed optimization
|
||||
(relative_offset, timestamp_ms, checksum,
|
||||
@@ -51,7 +51,7 @@ class FutureRecordMetadata(Future):
|
||||
if offset != -1 and relative_offset is not None:
|
||||
offset += relative_offset
|
||||
tp = self._produce_future.topic_partition
|
||||
metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms,
|
||||
metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, log_start_offset,
|
||||
checksum, serialized_key_size,
|
||||
serialized_value_size, serialized_header_size)
|
||||
self.success(metadata)
|
||||
@@ -67,5 +67,5 @@ class FutureRecordMetadata(Future):
|
||||
|
||||
|
||||
RecordMetadata = collections.namedtuple(
|
||||
'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp',
|
||||
'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', 'log_start_offset',
|
||||
'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size'])
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from __future__ import absolute_import, division
|
||||
from __future__ import absolute_import
|
||||
|
||||
import atexit
|
||||
import copy
|
||||
import logging
|
||||
import socket
|
||||
import threading
|
||||
import warnings
|
||||
import time
|
||||
import weakref
|
||||
|
||||
from kafka.vendor import six
|
||||
@@ -18,12 +18,10 @@ from kafka.partitioner.default import DefaultPartitioner
|
||||
from kafka.producer.future import FutureRecordMetadata, FutureProduceResult
|
||||
from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator
|
||||
from kafka.producer.sender import Sender
|
||||
from kafka.producer.transaction_manager import TransactionManager
|
||||
from kafka.record.default_records import DefaultRecordBatchBuilder
|
||||
from kafka.record.legacy_records import LegacyRecordBatchBuilder
|
||||
from kafka.serializer import Serializer
|
||||
from kafka.structs import TopicPartition
|
||||
from kafka.util import Timer, ensure_valid_topic_name
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -36,8 +34,8 @@ class KafkaProducer(object):
|
||||
The producer is thread safe and sharing a single producer instance across
|
||||
threads will generally be faster than having multiple instances.
|
||||
|
||||
The producer consists of a RecordAccumulator which holds records that
|
||||
haven't yet been transmitted to the server, and a Sender background I/O
|
||||
The producer consists of a pool of buffer space that holds records that
|
||||
haven't yet been transmitted to the server as well as a background I/O
|
||||
thread that is responsible for turning these records into requests and
|
||||
transmitting them to the cluster.
|
||||
|
||||
@@ -73,50 +71,14 @@ class KafkaProducer(object):
|
||||
can lead to fewer, more efficient requests when not under maximal load at
|
||||
the cost of a small amount of latency.
|
||||
|
||||
The buffer_memory controls the total amount of memory available to the
|
||||
producer for buffering. If records are sent faster than they can be
|
||||
transmitted to the server then this buffer space will be exhausted. When
|
||||
the buffer space is exhausted additional send calls will block.
|
||||
|
||||
The key_serializer and value_serializer instruct how to turn the key and
|
||||
value objects the user provides into bytes.
|
||||
|
||||
From Kafka 0.11, the KafkaProducer supports two additional modes:
|
||||
the idempotent producer and the transactional producer.
|
||||
The idempotent producer strengthens Kafka's delivery semantics from
|
||||
at least once to exactly once delivery. In particular, producer retries
|
||||
will no longer introduce duplicates. The transactional producer allows an
|
||||
application to send messages to multiple partitions (and topics!)
|
||||
atomically.
|
||||
|
||||
To enable idempotence, the `enable_idempotence` configuration must be set
|
||||
to True. If set, the `retries` config will default to `float('inf')` and
|
||||
the `acks` config will default to 'all'. There are no API changes for the
|
||||
idempotent producer, so existing applications will not need to be modified
|
||||
to take advantage of this feature.
|
||||
|
||||
To take advantage of the idempotent producer, it is imperative to avoid
|
||||
application level re-sends since these cannot be de-duplicated. As such, if
|
||||
an application enables idempotence, it is recommended to leave the
|
||||
`retries` config unset, as it will be defaulted to `float('inf')`.
|
||||
Additionally, if a :meth:`~kafka.KafkaProducer.send` returns an error even
|
||||
with infinite retries (for instance if the message expires in the buffer
|
||||
before being sent), then it is recommended to shut down the producer and
|
||||
check the contents of the last produced message to ensure that it is not
|
||||
duplicated. Finally, the producer can only guarantee idempotence for
|
||||
messages sent within a single session.
|
||||
|
||||
To use the transactional producer and the attendant APIs, you must set the
|
||||
`transactional_id` configuration property. If the `transactional_id` is
|
||||
set, idempotence is automatically enabled along with the producer configs
|
||||
which idempotence depends on. Further, topics which are included in
|
||||
transactions should be configured for durability. In particular, the
|
||||
`replication.factor` should be at least `3`, and the `min.insync.replicas`
|
||||
for these topics should be set to 2. Finally, in order for transactional
|
||||
guarantees to be realized from end-to-end, the consumers must be
|
||||
configured to read only committed messages as well.
|
||||
|
||||
The purpose of the `transactional_id` is to enable transaction recovery
|
||||
across multiple sessions of a single producer instance. It would typically
|
||||
be derived from the shard identifier in a partitioned, stateful,
|
||||
application. As such, it should be unique to each producer instance running
|
||||
within a partitioned application.
|
||||
|
||||
Keyword Arguments:
|
||||
bootstrap_servers: 'host[:port]' string (or list of 'host[:port]'
|
||||
strings) that the producer should contact to bootstrap initial
|
||||
@@ -134,28 +96,6 @@ class KafkaProducer(object):
|
||||
value_serializer (callable): used to convert user-supplied message
|
||||
values to bytes. If not None, called as f(value), should return
|
||||
bytes. Default: None.
|
||||
enable_idempotence (bool): When set to True, the producer will ensure
|
||||
that exactly one copy of each message is written in the stream.
|
||||
If False, producer retries due to broker failures, etc., may write
|
||||
duplicates of the retried message in the stream. Default: False.
|
||||
|
||||
Note that enabling idempotence requires
|
||||
`max_in_flight_requests_per_connection` to be set to 1 and `retries`
|
||||
cannot be zero. Additionally, `acks` must be set to 'all'. If these
|
||||
values are left at their defaults, the producer will override the
|
||||
defaults to be suitable. If the values are set to something
|
||||
incompatible with the idempotent producer, a KafkaConfigurationError
|
||||
will be raised.
|
||||
delivery_timeout_ms (float): An upper bound on the time to report success
|
||||
or failure after producer.send() returns. This limits the total time
|
||||
that a record will be delayed prior to sending, the time to await
|
||||
acknowledgement from the broker (if expected), and the time allowed
|
||||
for retriable send failures. The producer may report failure to send
|
||||
a record earlier than this config if either an unrecoverable error is
|
||||
encountered, the retries have been exhausted, or the record is added
|
||||
to a batch which reached an earlier delivery expiration deadline.
|
||||
The value of this config should be greater than or equal to the
|
||||
sum of (request_timeout_ms + linger_ms). Default: 120000.
|
||||
acks (0, 1, 'all'): The number of acknowledgments the producer requires
|
||||
the leader to have received before considering a request complete.
|
||||
This controls the durability of records that are sent. The
|
||||
@@ -183,7 +123,7 @@ class KafkaProducer(object):
|
||||
Compression is of full batches of data, so the efficacy of batching
|
||||
will also impact the compression ratio (more batching means better
|
||||
compression). Default: None.
|
||||
retries (numeric): Setting a value greater than zero will cause the client
|
||||
retries (int): Setting a value greater than zero will cause the client
|
||||
to resend any record whose send fails with a potentially transient
|
||||
error. Note that this retry is no different than if the client
|
||||
resent the record upon receiving the error. Allowing retries
|
||||
@@ -191,12 +131,8 @@ class KafkaProducer(object):
|
||||
potentially change the ordering of records because if two batches
|
||||
are sent to a single partition, and the first fails and is retried
|
||||
but the second succeeds, then the records in the second batch may
|
||||
appear first. Note additionally that produce requests will be
|
||||
failed before the number of retries has been exhausted if the timeout
|
||||
configured by delivery_timeout_ms expires first before successful
|
||||
acknowledgement. Users should generally prefer to leave this config
|
||||
unset and instead use delivery_timeout_ms to control retry behavior.
|
||||
Default: float('inf') (infinite)
|
||||
appear first.
|
||||
Default: 0.
|
||||
batch_size (int): Requests sent to brokers will contain multiple
|
||||
batches, one for each partition with data available to be sent.
|
||||
A small batch size will make batching less common and may reduce
|
||||
@@ -229,6 +165,12 @@ class KafkaProducer(object):
|
||||
messages with the same key are assigned to the same partition.
|
||||
When a key is None, the message is delivered to a random partition
|
||||
(filtered to partitions with available leaders only, if possible).
|
||||
buffer_memory (int): The total bytes of memory the producer should use
|
||||
to buffer records waiting to be sent to the server. If records are
|
||||
sent faster than they can be delivered to the server the producer
|
||||
will block up to max_block_ms, raising an exception on timeout.
|
||||
In the current implementation, this setting is an approximation.
|
||||
Default: 33554432 (32MB)
|
||||
connections_max_idle_ms: Close idle connections after the number of
|
||||
milliseconds specified by this config. The broker closes idle
|
||||
connections after connections.max.idle.ms, so this avoids hitting
|
||||
@@ -246,9 +188,6 @@ class KafkaProducer(object):
|
||||
This setting will limit the number of record batches the producer
|
||||
will send in a single request to avoid sending huge requests.
|
||||
Default: 1048576.
|
||||
allow_auto_create_topics (bool): Enable/disable auto topic creation
|
||||
on metadata request. Only available with api_version >= (0, 11).
|
||||
Default: True
|
||||
metadata_max_age_ms (int): The period of time in milliseconds after
|
||||
which we force a refresh of metadata even if we haven't seen any
|
||||
partition leadership changes to proactively discover any new
|
||||
@@ -277,7 +216,7 @@ class KafkaProducer(object):
|
||||
reconnection attempts will continue periodically with this fixed
|
||||
rate. To avoid connection storms, a randomization factor of 0.2
|
||||
will be applied to the backoff resulting in a random range between
|
||||
20% below and 20% above the computed value. Default: 30000.
|
||||
20% below and 20% above the computed value. Default: 1000.
|
||||
max_in_flight_requests_per_connection (int): Requests are pipelined
|
||||
to kafka brokers up to this number of maximum requests per
|
||||
broker connection. Note that if this setting is set to be greater
|
||||
@@ -294,7 +233,7 @@ class KafkaProducer(object):
|
||||
should verify that the certificate matches the brokers hostname.
|
||||
default: true.
|
||||
ssl_cafile (str): optional filename of ca file to use in certificate
|
||||
verification. default: none.
|
||||
veriication. default: none.
|
||||
ssl_certfile (str): optional filename of file in pem format containing
|
||||
the client certificate, as well as any ca certificates needed to
|
||||
establish the certificate's authenticity. default: none.
|
||||
@@ -313,28 +252,14 @@ class KafkaProducer(object):
|
||||
or other configuration forbids use of all the specified ciphers),
|
||||
an ssl.SSLError will be raised. See ssl.SSLContext.set_ciphers
|
||||
api_version (tuple): Specify which Kafka API version to use. If set to
|
||||
None, the client will attempt to determine the broker version via
|
||||
ApiVersionsRequest API or, for brokers earlier than 0.10, probing
|
||||
various known APIs. Dynamic version checking is performed eagerly
|
||||
during __init__ and can raise NoBrokersAvailableError if no connection
|
||||
was made before timeout (see api_version_auto_timeout_ms below).
|
||||
Different versions enable different functionality.
|
||||
|
||||
Examples:
|
||||
(3, 9) most recent broker release, enable all supported features
|
||||
(0, 11) enables message format v2 (internal)
|
||||
(0, 10, 0) enables sasl authentication and message format v1
|
||||
(0, 8, 0) enables basic functionality only
|
||||
|
||||
Default: None
|
||||
None, the client will attempt to infer the broker version by probing
|
||||
various APIs. Example: (0, 10, 2). Default: None
|
||||
api_version_auto_timeout_ms (int): number of milliseconds to throw a
|
||||
timeout exception from the constructor when checking the broker
|
||||
api version. Only applies if api_version set to None.
|
||||
Default: 2000
|
||||
metric_reporters (list): A list of classes to use as metrics reporters.
|
||||
Implementing the AbstractMetricsReporter interface allows plugging
|
||||
in classes that will be notified of new metric creation. Default: []
|
||||
metrics_enabled (bool): Whether to track metrics on this instance. Default True.
|
||||
metrics_num_samples (int): The number of samples maintained to compute
|
||||
metrics. Default: 2
|
||||
metrics_sample_window_ms (int): The maximum age in milliseconds of
|
||||
@@ -349,42 +274,33 @@ class KafkaProducer(object):
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication.
|
||||
Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms.
|
||||
sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with
|
||||
sasl mechanism handshake. If provided, sasl_kerberos_service_name and
|
||||
sasl_kerberos_domain name are ignored. Default: None.
|
||||
sasl_kerberos_service_name (str): Service name to include in GSSAPI
|
||||
sasl mechanism handshake. Default: 'kafka'
|
||||
sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI
|
||||
sasl mechanism handshake. Default: one of bootstrap servers
|
||||
sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer
|
||||
token provider instance. Default: None
|
||||
socks5_proxy (str): Socks5 proxy URL. Default: None
|
||||
kafka_client (callable): Custom class / callable for creating KafkaClient instances
|
||||
sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider
|
||||
instance. (See kafka.oauth.abstract). Default: None
|
||||
|
||||
Note:
|
||||
Configuration parameters are described in more detail at
|
||||
https://kafka.apache.org/0100/documentation/#producerconfigs
|
||||
https://kafka.apache.org/0100/configuration.html#producerconfigs
|
||||
"""
|
||||
DEFAULT_CONFIG = {
|
||||
'bootstrap_servers': 'localhost',
|
||||
'client_id': None,
|
||||
'key_serializer': None,
|
||||
'value_serializer': None,
|
||||
'enable_idempotence': False,
|
||||
'transactional_id': None,
|
||||
'transaction_timeout_ms': 60000,
|
||||
'delivery_timeout_ms': 120000,
|
||||
'acks': 1,
|
||||
'bootstrap_topics_filter': set(),
|
||||
'compression_type': None,
|
||||
'retries': float('inf'),
|
||||
'retries': 0,
|
||||
'batch_size': 16384,
|
||||
'linger_ms': 0,
|
||||
'partitioner': DefaultPartitioner(),
|
||||
'buffer_memory': 33554432,
|
||||
'connections_max_idle_ms': 9 * 60 * 1000,
|
||||
'max_block_ms': 60000,
|
||||
'max_request_size': 1048576,
|
||||
'allow_auto_create_topics': True,
|
||||
'metadata_max_age_ms': 300000,
|
||||
'retry_backoff_ms': 100,
|
||||
'request_timeout_ms': 30000,
|
||||
@@ -394,7 +310,7 @@ class KafkaProducer(object):
|
||||
'sock_chunk_bytes': 4096, # undocumented experimental option
|
||||
'sock_chunk_buffer_count': 1000, # undocumented experimental option
|
||||
'reconnect_backoff_ms': 50,
|
||||
'reconnect_backoff_max_ms': 30000,
|
||||
'reconnect_backoff_max_ms': 1000,
|
||||
'max_in_flight_requests_per_connection': 5,
|
||||
'security_protocol': 'PLAINTEXT',
|
||||
'ssl_context': None,
|
||||
@@ -408,23 +324,17 @@ class KafkaProducer(object):
|
||||
'api_version': None,
|
||||
'api_version_auto_timeout_ms': 2000,
|
||||
'metric_reporters': [],
|
||||
'metrics_enabled': True,
|
||||
'metrics_num_samples': 2,
|
||||
'metrics_sample_window_ms': 30000,
|
||||
'selector': selectors.DefaultSelector,
|
||||
'sasl_mechanism': None,
|
||||
'sasl_plain_username': None,
|
||||
'sasl_plain_password': None,
|
||||
'sasl_kerberos_name': None,
|
||||
'sasl_kerberos_service_name': 'kafka',
|
||||
'sasl_kerberos_domain_name': None,
|
||||
'sasl_oauth_token_provider': None,
|
||||
'socks5_proxy': None,
|
||||
'kafka_client': KafkaClient,
|
||||
'sasl_oauth_token_provider': None
|
||||
}
|
||||
|
||||
DEPRECATED_CONFIGS = ('buffer_memory',)
|
||||
|
||||
_COMPRESSORS = {
|
||||
'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP),
|
||||
'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY),
|
||||
@@ -434,17 +344,12 @@ class KafkaProducer(object):
|
||||
}
|
||||
|
||||
def __init__(self, **configs):
|
||||
log.debug("Starting the Kafka producer") # trace
|
||||
self.config = copy.copy(self.DEFAULT_CONFIG)
|
||||
user_provided_configs = set(configs.keys())
|
||||
for key in self.config:
|
||||
if key in configs:
|
||||
self.config[key] = configs.pop(key)
|
||||
|
||||
for key in self.DEPRECATED_CONFIGS:
|
||||
if key in configs:
|
||||
configs.pop(key)
|
||||
warnings.warn('Deprecated Producer config: %s' % (key,), DeprecationWarning)
|
||||
|
||||
# Only check for extra config keys in top-level class
|
||||
assert not configs, 'Unrecognized configs: %s' % (configs,)
|
||||
|
||||
@@ -462,35 +367,30 @@ class KafkaProducer(object):
|
||||
self.config['api_version'] = None
|
||||
else:
|
||||
self.config['api_version'] = tuple(map(int, deprecated.split('.')))
|
||||
log.warning('%s: use api_version=%s [tuple] -- "%s" as str is deprecated',
|
||||
str(self), str(self.config['api_version']), deprecated)
|
||||
|
||||
log.debug("%s: Starting Kafka producer", str(self))
|
||||
log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated',
|
||||
str(self.config['api_version']), deprecated)
|
||||
|
||||
# Configure metrics
|
||||
if self.config['metrics_enabled']:
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
else:
|
||||
self._metrics = None
|
||||
metrics_tags = {'client-id': self.config['client_id']}
|
||||
metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
|
||||
time_window_ms=self.config['metrics_sample_window_ms'],
|
||||
tags=metrics_tags)
|
||||
reporters = [reporter() for reporter in self.config['metric_reporters']]
|
||||
self._metrics = Metrics(metric_config, reporters)
|
||||
|
||||
client = self.config['kafka_client'](
|
||||
metrics=self._metrics, metric_group_prefix='producer',
|
||||
wakeup_timeout_ms=self.config['max_block_ms'],
|
||||
**self.config)
|
||||
client = KafkaClient(metrics=self._metrics, metric_group_prefix='producer',
|
||||
wakeup_timeout_ms=self.config['max_block_ms'],
|
||||
**self.config)
|
||||
|
||||
# Get auto-discovered / normalized version from client
|
||||
self.config['api_version'] = client.config['api_version']
|
||||
# Get auto-discovered version from client if necessary
|
||||
if self.config['api_version'] is None:
|
||||
self.config['api_version'] = client.config['api_version']
|
||||
|
||||
if self.config['compression_type'] == 'lz4':
|
||||
assert self.config['api_version'] >= (0, 8, 2), 'LZ4 Requires >= Kafka 0.8.2 Brokers'
|
||||
|
||||
if self.config['compression_type'] == 'zstd':
|
||||
assert self.config['api_version'] >= (2, 1), 'Zstd Requires >= Kafka 2.1 Brokers'
|
||||
assert self.config['api_version'] >= (2, 1, 0), 'Zstd Requires >= Kafka 2.1.0 Brokers'
|
||||
|
||||
# Check compression_type for library support
|
||||
ct = self.config['compression_type']
|
||||
@@ -501,58 +401,12 @@ class KafkaProducer(object):
|
||||
assert checker(), "Libraries for {} compression codec not found".format(ct)
|
||||
self.config['compression_attrs'] = compression_attrs
|
||||
|
||||
message_version = self._max_usable_produce_magic()
|
||||
self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config)
|
||||
self._metadata = client.cluster
|
||||
self._transaction_manager = None
|
||||
self._init_transactions_result = None
|
||||
if 'enable_idempotence' in user_provided_configs and not self.config['enable_idempotence'] and self.config['transactional_id']:
|
||||
raise Errors.KafkaConfigurationError("Cannot set transactional_id without enable_idempotence.")
|
||||
|
||||
if self.config['transactional_id']:
|
||||
self.config['enable_idempotence'] = True
|
||||
|
||||
if self.config['enable_idempotence']:
|
||||
assert self.config['api_version'] >= (0, 11), "Transactional/Idempotent producer requires >= Kafka 0.11 Brokers"
|
||||
|
||||
self._transaction_manager = TransactionManager(
|
||||
transactional_id=self.config['transactional_id'],
|
||||
transaction_timeout_ms=self.config['transaction_timeout_ms'],
|
||||
retry_backoff_ms=self.config['retry_backoff_ms'],
|
||||
api_version=self.config['api_version'],
|
||||
metadata=self._metadata,
|
||||
)
|
||||
if self._transaction_manager.is_transactional():
|
||||
log.info("%s: Instantiated a transactional producer.", str(self))
|
||||
else:
|
||||
log.info("%s: Instantiated an idempotent producer.", str(self))
|
||||
|
||||
if self.config['retries'] == 0:
|
||||
raise Errors.KafkaConfigurationError("Must set 'retries' to non-zero when using the idempotent producer.")
|
||||
|
||||
if 'max_in_flight_requests_per_connection' not in user_provided_configs:
|
||||
log.info("%s: Overriding the default 'max_in_flight_requests_per_connection' to 1 since idempontence is enabled.", str(self))
|
||||
self.config['max_in_flight_requests_per_connection'] = 1
|
||||
elif self.config['max_in_flight_requests_per_connection'] != 1:
|
||||
raise Errors.KafkaConfigurationError("Must set 'max_in_flight_requests_per_connection' to 1 in order"
|
||||
" to use the idempotent producer."
|
||||
" Otherwise we cannot guarantee idempotence.")
|
||||
|
||||
if 'acks' not in user_provided_configs:
|
||||
log.info("%s: Overriding the default 'acks' config to 'all' since idempotence is enabled", str(self))
|
||||
self.config['acks'] = -1
|
||||
elif self.config['acks'] != -1:
|
||||
raise Errors.KafkaConfigurationError("Must set 'acks' config to 'all' in order to use the idempotent"
|
||||
" producer. Otherwise we cannot guarantee idempotence")
|
||||
|
||||
message_version = self.max_usable_produce_magic(self.config['api_version'])
|
||||
self._accumulator = RecordAccumulator(
|
||||
transaction_manager=self._transaction_manager,
|
||||
message_version=message_version,
|
||||
**self.config)
|
||||
guarantee_message_order = bool(self.config['max_in_flight_requests_per_connection'] == 1)
|
||||
self._sender = Sender(client, self._metadata,
|
||||
self._accumulator,
|
||||
metrics=self._metrics,
|
||||
transaction_manager=self._transaction_manager,
|
||||
self._accumulator, self._metrics,
|
||||
guarantee_message_order=guarantee_message_order,
|
||||
**self.config)
|
||||
self._sender.daemon = True
|
||||
@@ -561,7 +415,7 @@ class KafkaProducer(object):
|
||||
|
||||
self._cleanup = self._cleanup_factory()
|
||||
atexit.register(self._cleanup)
|
||||
log.debug("%s: Kafka producer started", str(self))
|
||||
log.debug("Kafka producer started")
|
||||
|
||||
def bootstrap_connected(self):
|
||||
"""Return True if the bootstrap is connected."""
|
||||
@@ -572,7 +426,7 @@ class KafkaProducer(object):
|
||||
_self = weakref.proxy(self)
|
||||
def wrapper():
|
||||
try:
|
||||
_self.close(timeout=0, null_logger=True)
|
||||
_self.close(timeout=0)
|
||||
except (ReferenceError, AttributeError):
|
||||
pass
|
||||
return wrapper
|
||||
@@ -595,28 +449,28 @@ class KafkaProducer(object):
|
||||
self._cleanup = None
|
||||
|
||||
def __del__(self):
|
||||
self.close(timeout=1, null_logger=True)
|
||||
# Disable logger during destruction to avoid touching dangling references
|
||||
class NullLogger(object):
|
||||
def __getattr__(self, name):
|
||||
return lambda *args: None
|
||||
|
||||
def close(self, timeout=None, null_logger=False):
|
||||
global log
|
||||
log = NullLogger()
|
||||
|
||||
self.close()
|
||||
|
||||
def close(self, timeout=None):
|
||||
"""Close this producer.
|
||||
|
||||
Arguments:
|
||||
timeout (float, optional): timeout in seconds to wait for completion.
|
||||
"""
|
||||
if null_logger:
|
||||
# Disable logger during destruction to avoid touching dangling references
|
||||
class NullLogger(object):
|
||||
def __getattr__(self, name):
|
||||
return lambda *args: None
|
||||
|
||||
global log
|
||||
log = NullLogger()
|
||||
|
||||
# drop our atexit handler now to avoid leaks
|
||||
self._unregister_cleanup()
|
||||
|
||||
if not hasattr(self, '_closed') or self._closed:
|
||||
log.info('%s: Kafka producer closed', str(self))
|
||||
log.info('Kafka producer closed')
|
||||
return
|
||||
if timeout is None:
|
||||
# threading.TIMEOUT_MAX is available in Python3.3+
|
||||
@@ -626,16 +480,15 @@ class KafkaProducer(object):
|
||||
else:
|
||||
assert timeout >= 0
|
||||
|
||||
log.info("%s: Closing the Kafka producer with %s secs timeout.", str(self), timeout)
|
||||
self.flush(timeout)
|
||||
log.info("Closing the Kafka producer with %s secs timeout.", timeout)
|
||||
invoked_from_callback = bool(threading.current_thread() is self._sender)
|
||||
if timeout > 0:
|
||||
if invoked_from_callback:
|
||||
log.warning("%s: Overriding close timeout %s secs to 0 in order to"
|
||||
log.warning("Overriding close timeout %s secs to 0 in order to"
|
||||
" prevent useless blocking due to self-join. This"
|
||||
" means you have incorrectly invoked close with a"
|
||||
" non-zero timeout from the producer call-back.",
|
||||
str(self), timeout)
|
||||
timeout)
|
||||
else:
|
||||
# Try to close gracefully.
|
||||
if self._sender is not None:
|
||||
@@ -643,13 +496,12 @@ class KafkaProducer(object):
|
||||
self._sender.join(timeout)
|
||||
|
||||
if self._sender is not None and self._sender.is_alive():
|
||||
log.info("%s: Proceeding to force close the producer since pending"
|
||||
log.info("Proceeding to force close the producer since pending"
|
||||
" requests could not be completed within timeout %s.",
|
||||
str(self), timeout)
|
||||
timeout)
|
||||
self._sender.force_close()
|
||||
|
||||
if self._metrics:
|
||||
self._metrics.close()
|
||||
self._metrics.close()
|
||||
try:
|
||||
self.config['key_serializer'].close()
|
||||
except AttributeError:
|
||||
@@ -659,23 +511,23 @@ class KafkaProducer(object):
|
||||
except AttributeError:
|
||||
pass
|
||||
self._closed = True
|
||||
log.debug("%s: The Kafka producer has closed.", str(self))
|
||||
log.debug("The Kafka producer has closed.")
|
||||
|
||||
def partitions_for(self, topic):
|
||||
"""Returns set of all known partitions for the topic."""
|
||||
return self._wait_on_metadata(topic, self.config['max_block_ms'])
|
||||
max_wait = self.config['max_block_ms'] / 1000.0
|
||||
return self._wait_on_metadata(topic, max_wait)
|
||||
|
||||
@classmethod
|
||||
def max_usable_produce_magic(cls, api_version):
|
||||
if api_version >= (0, 11):
|
||||
def _max_usable_produce_magic(self):
|
||||
if self.config['api_version'] >= (0, 11):
|
||||
return 2
|
||||
elif api_version >= (0, 10, 0):
|
||||
elif self.config['api_version'] >= (0, 10):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def _estimate_size_in_bytes(self, key, value, headers=[]):
|
||||
magic = self.max_usable_produce_magic(self.config['api_version'])
|
||||
magic = self._max_usable_produce_magic()
|
||||
if magic == 2:
|
||||
return DefaultRecordBatchBuilder.estimate_size_in_bytes(
|
||||
key, value, headers)
|
||||
@@ -683,114 +535,6 @@ class KafkaProducer(object):
|
||||
return LegacyRecordBatchBuilder.estimate_size_in_bytes(
|
||||
magic, self.config['compression_type'], key, value)
|
||||
|
||||
def init_transactions(self):
|
||||
"""
|
||||
Needs to be called before any other methods when the transactional.id is set in the configuration.
|
||||
|
||||
This method does the following:
|
||||
1. Ensures any transactions initiated by previous instances of the producer with the same
|
||||
transactional_id are completed. If the previous instance had failed with a transaction in
|
||||
progress, it will be aborted. If the last transaction had begun completion,
|
||||
but not yet finished, this method awaits its completion.
|
||||
2. Gets the internal producer id and epoch, used in all future transactional
|
||||
messages issued by the producer.
|
||||
|
||||
Note that this method will raise KafkaTimeoutError if the transactional state cannot
|
||||
be initialized before expiration of `max_block_ms`.
|
||||
|
||||
Retrying after a KafkaTimeoutError will continue to wait for the prior request to succeed or fail.
|
||||
Retrying after any other exception will start a new initialization attempt.
|
||||
Retrying after a successful initialization will do nothing.
|
||||
|
||||
Raises:
|
||||
IllegalStateError: if no transactional_id has been configured
|
||||
AuthorizationError: fatal error indicating that the configured
|
||||
transactional_id is not authorized.
|
||||
KafkaError: if the producer has encountered a previous fatal error or for any other unexpected error
|
||||
KafkaTimeoutError: if the time taken for initialize the transaction has surpassed `max.block.ms`.
|
||||
"""
|
||||
if not self._transaction_manager:
|
||||
raise Errors.IllegalStateError("Cannot call init_transactions without setting a transactional_id.")
|
||||
if self._init_transactions_result is None:
|
||||
self._init_transactions_result = self._transaction_manager.initialize_transactions()
|
||||
self._sender.wakeup()
|
||||
|
||||
try:
|
||||
if not self._init_transactions_result.wait(timeout_ms=self.config['max_block_ms']):
|
||||
raise Errors.KafkaTimeoutError("Timeout expired while initializing transactional state in %s ms." % (self.config['max_block_ms'],))
|
||||
finally:
|
||||
if self._init_transactions_result.failed:
|
||||
self._init_transactions_result = None
|
||||
|
||||
def begin_transaction(self):
|
||||
""" Should be called before the start of each new transaction.
|
||||
|
||||
Note that prior to the first invocation of this method,
|
||||
you must invoke `init_transactions()` exactly one time.
|
||||
|
||||
Raises:
|
||||
ProducerFencedError if another producer is with the same
|
||||
transactional_id is active.
|
||||
"""
|
||||
# Set the transactional bit in the producer.
|
||||
if not self._transaction_manager:
|
||||
raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions")
|
||||
self._transaction_manager.begin_transaction()
|
||||
|
||||
def send_offsets_to_transaction(self, offsets, consumer_group_id):
|
||||
"""
|
||||
Sends a list of consumed offsets to the consumer group coordinator, and also marks
|
||||
those offsets as part of the current transaction. These offsets will be considered
|
||||
consumed only if the transaction is committed successfully.
|
||||
|
||||
This method should be used when you need to batch consumed and produced messages
|
||||
together, typically in a consume-transform-produce pattern.
|
||||
|
||||
Arguments:
|
||||
offsets ({TopicPartition: OffsetAndMetadata}): map of topic-partition -> offsets to commit
|
||||
as part of current transaction.
|
||||
consumer_group_id (str): Name of consumer group for offsets commit.
|
||||
|
||||
Raises:
|
||||
IllegalStateError: if no transactional_id, or transaction has not been started.
|
||||
ProducerFencedError: fatal error indicating another producer with the same transactional_id is active.
|
||||
UnsupportedVersionError: fatal error indicating the broker does not support transactions (i.e. if < 0.11).
|
||||
UnsupportedForMessageFormatError: fatal error indicating the message format used for the offsets
|
||||
topic on the broker does not support transactions.
|
||||
AuthorizationError: fatal error indicating that the configured transactional_id is not authorized.
|
||||
KafkaErro:r if the producer has encountered a previous fatal or abortable error, or for any
|
||||
other unexpected error
|
||||
"""
|
||||
if not self._transaction_manager:
|
||||
raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions")
|
||||
result = self._transaction_manager.send_offsets_to_transaction(offsets, consumer_group_id)
|
||||
self._sender.wakeup()
|
||||
result.wait()
|
||||
|
||||
def commit_transaction(self):
|
||||
""" Commits the ongoing transaction.
|
||||
|
||||
Raises: ProducerFencedError if another producer with the same
|
||||
transactional_id is active.
|
||||
"""
|
||||
if not self._transaction_manager:
|
||||
raise Errors.IllegalStateError("Cannot commit transaction since transactions are not enabled")
|
||||
result = self._transaction_manager.begin_commit()
|
||||
self._sender.wakeup()
|
||||
result.wait()
|
||||
|
||||
def abort_transaction(self):
|
||||
""" Aborts the ongoing transaction.
|
||||
|
||||
Raises: ProducerFencedError if another producer with the same
|
||||
transactional_id is active.
|
||||
"""
|
||||
if not self._transaction_manager:
|
||||
raise Errors.IllegalStateError("Cannot abort transaction since transactions are not enabled.")
|
||||
result = self._transaction_manager.begin_abort()
|
||||
self._sender.wakeup()
|
||||
result.wait()
|
||||
|
||||
def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None):
|
||||
"""Publish a message to a topic.
|
||||
|
||||
@@ -823,58 +567,44 @@ class KafkaProducer(object):
|
||||
Raises:
|
||||
KafkaTimeoutError: if unable to fetch topic metadata, or unable
|
||||
to obtain memory buffer prior to configured max_block_ms
|
||||
TypeError: if topic is not a string
|
||||
ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length
|
||||
AssertionError: if KafkaProducer is closed, or key and value are both None
|
||||
"""
|
||||
assert not self._closed, 'KafkaProducer already closed!'
|
||||
assert value is not None or self.config['api_version'] >= (0, 8, 1), (
|
||||
'Null messages require kafka >= 0.8.1')
|
||||
assert not (value is None and key is None), 'Need at least one: key or value'
|
||||
ensure_valid_topic_name(topic)
|
||||
key_bytes = value_bytes = None
|
||||
timer = Timer(self.config['max_block_ms'], "Failed to assign partition for message in max_block_ms.")
|
||||
try:
|
||||
assigned_partition = None
|
||||
while assigned_partition is None and not timer.expired:
|
||||
self._wait_on_metadata(topic, timer.timeout_ms)
|
||||
self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
|
||||
|
||||
key_bytes = self._serialize(
|
||||
self.config['key_serializer'],
|
||||
topic, key)
|
||||
value_bytes = self._serialize(
|
||||
self.config['value_serializer'],
|
||||
topic, value)
|
||||
assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
|
||||
assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))
|
||||
key_bytes = self._serialize(
|
||||
self.config['key_serializer'],
|
||||
topic, key)
|
||||
value_bytes = self._serialize(
|
||||
self.config['value_serializer'],
|
||||
topic, value)
|
||||
assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
|
||||
assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))
|
||||
|
||||
assigned_partition = self._partition(topic, partition, key, value,
|
||||
key_bytes, value_bytes)
|
||||
if assigned_partition is None:
|
||||
raise Errors.KafkaTimeoutError("Failed to assign partition for message after %s secs." % timer.elapsed_ms / 1000)
|
||||
else:
|
||||
partition = assigned_partition
|
||||
partition = self._partition(topic, partition, key, value,
|
||||
key_bytes, value_bytes)
|
||||
|
||||
if headers is None:
|
||||
headers = []
|
||||
assert isinstance(headers, list)
|
||||
assert all(isinstance(item, tuple) and len(item) == 2 and isinstance(item[0], str) and isinstance(item[1], bytes) for item in headers)
|
||||
assert type(headers) == list
|
||||
assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers)
|
||||
|
||||
message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers)
|
||||
self._ensure_valid_record_size(message_size)
|
||||
|
||||
tp = TopicPartition(topic, partition)
|
||||
log.debug("%s: Sending (key=%r value=%r headers=%r) to %s", str(self), key, value, headers, tp)
|
||||
|
||||
if self._transaction_manager and self._transaction_manager.is_transactional():
|
||||
self._transaction_manager.maybe_add_partition_to_transaction(tp)
|
||||
|
||||
log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp)
|
||||
result = self._accumulator.append(tp, timestamp_ms,
|
||||
key_bytes, value_bytes, headers)
|
||||
key_bytes, value_bytes, headers,
|
||||
self.config['max_block_ms'],
|
||||
estimated_size=message_size)
|
||||
future, batch_is_full, new_batch_created = result
|
||||
if batch_is_full or new_batch_created:
|
||||
log.debug("%s: Waking up the sender since %s is either full or"
|
||||
" getting a new batch", str(self), tp)
|
||||
log.debug("Waking up the sender since %s is either full or"
|
||||
" getting a new batch", tp)
|
||||
self._sender.wakeup()
|
||||
|
||||
return future
|
||||
@@ -882,7 +612,7 @@ class KafkaProducer(object):
|
||||
# for API exceptions return them in the future,
|
||||
# for other exceptions raise directly
|
||||
except Errors.BrokerResponseError as e:
|
||||
log.error("%s: Exception occurred during message send: %s", str(self), e)
|
||||
log.debug("Exception occurred during message send: %s", e)
|
||||
return FutureRecordMetadata(
|
||||
FutureProduceResult(TopicPartition(topic, partition)),
|
||||
-1, None, None,
|
||||
@@ -913,7 +643,7 @@ class KafkaProducer(object):
|
||||
KafkaTimeoutError: failure to flush buffered records within the
|
||||
provided timeout
|
||||
"""
|
||||
log.debug("%s: Flushing accumulated records in producer.", str(self))
|
||||
log.debug("Flushing accumulated records in producer.") # trace
|
||||
self._accumulator.begin_flush()
|
||||
self._sender.wakeup()
|
||||
self._accumulator.await_flush_completion(timeout=timeout)
|
||||
@@ -925,8 +655,13 @@ class KafkaProducer(object):
|
||||
"The message is %d bytes when serialized which is larger than"
|
||||
" the maximum request size you have configured with the"
|
||||
" max_request_size configuration" % (size,))
|
||||
if size > self.config['buffer_memory']:
|
||||
raise Errors.MessageSizeTooLargeError(
|
||||
"The message is %d bytes when serialized which is larger than"
|
||||
" the total memory buffer you have configured with the"
|
||||
" buffer_memory configuration." % (size,))
|
||||
|
||||
def _wait_on_metadata(self, topic, max_wait_ms):
|
||||
def _wait_on_metadata(self, topic, max_wait):
|
||||
"""
|
||||
Wait for cluster metadata including partitions for the given topic to
|
||||
be available.
|
||||
@@ -944,31 +679,32 @@ class KafkaProducer(object):
|
||||
"""
|
||||
# add topic to metadata topic list if it is not there already.
|
||||
self._sender.add_topic(topic)
|
||||
timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
|
||||
begin = time.time()
|
||||
elapsed = 0.0
|
||||
metadata_event = None
|
||||
while True:
|
||||
partitions = self._metadata.partitions_for_topic(topic)
|
||||
if partitions is not None:
|
||||
return partitions
|
||||
timer.maybe_raise()
|
||||
|
||||
if not metadata_event:
|
||||
metadata_event = threading.Event()
|
||||
|
||||
log.debug("%s: Requesting metadata update for topic %s", str(self), topic)
|
||||
log.debug("Requesting metadata update for topic %s", topic)
|
||||
|
||||
metadata_event.clear()
|
||||
future = self._metadata.request_update()
|
||||
future.add_both(lambda e, *args: e.set(), metadata_event)
|
||||
self._sender.wakeup()
|
||||
metadata_event.wait(timer.timeout_ms / 1000)
|
||||
if not future.is_done:
|
||||
metadata_event.wait(max_wait - elapsed)
|
||||
elapsed = time.time() - begin
|
||||
if not metadata_event.is_set():
|
||||
raise Errors.KafkaTimeoutError(
|
||||
"Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,))
|
||||
elif future.failed() and not future.retriable():
|
||||
raise future.exception
|
||||
"Failed to update metadata after %.1f secs." % (max_wait,))
|
||||
elif topic in self._metadata.unauthorized_topics:
|
||||
raise Errors.TopicAuthorizationFailedError(set([topic]))
|
||||
raise Errors.TopicAuthorizationFailedError(topic)
|
||||
else:
|
||||
log.debug("%s: _wait_on_metadata woke after %s secs.", str(self), timer.elapsed_ms / 1000)
|
||||
log.debug("_wait_on_metadata woke after %s secs.", elapsed)
|
||||
|
||||
def _serialize(self, f, topic, data):
|
||||
if not f:
|
||||
@@ -979,18 +715,16 @@ class KafkaProducer(object):
|
||||
|
||||
def _partition(self, topic, partition, key, value,
|
||||
serialized_key, serialized_value):
|
||||
all_partitions = self._metadata.partitions_for_topic(topic)
|
||||
available = self._metadata.available_partitions_for_topic(topic)
|
||||
if all_partitions is None or available is None:
|
||||
return None
|
||||
if partition is not None:
|
||||
assert partition >= 0
|
||||
assert partition in all_partitions, 'Unrecognized partition'
|
||||
assert partition in self._metadata.partitions_for_topic(topic), 'Unrecognized partition'
|
||||
return partition
|
||||
|
||||
all_partitions = sorted(self._metadata.partitions_for_topic(topic))
|
||||
available = list(self._metadata.available_partitions_for_topic(topic))
|
||||
return self.config['partitioner'](serialized_key,
|
||||
sorted(all_partitions),
|
||||
list(available))
|
||||
all_partitions,
|
||||
available)
|
||||
|
||||
def metrics(self, raw=False):
|
||||
"""Get metrics on producer performance.
|
||||
@@ -1002,8 +736,6 @@ class KafkaProducer(object):
|
||||
This is an unstable interface. It may change in future
|
||||
releases without warning.
|
||||
"""
|
||||
if not self._metrics:
|
||||
return
|
||||
if raw:
|
||||
return self._metrics.metrics.copy()
|
||||
|
||||
@@ -1015,6 +747,3 @@ class KafkaProducer(object):
|
||||
metrics[k.group][k.name] = {}
|
||||
metrics[k.group][k.name] = v.value()
|
||||
return metrics
|
||||
|
||||
def __str__(self):
|
||||
return "<KafkaProducer client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import, division
|
||||
from __future__ import absolute_import
|
||||
|
||||
import collections
|
||||
import copy
|
||||
@@ -6,14 +6,8 @@ import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
try:
|
||||
# enum in stdlib as of py3.4
|
||||
from enum import IntEnum # pylint: disable=import-error
|
||||
except ImportError:
|
||||
# vendored backport module
|
||||
from kafka.vendor.enum34 import IntEnum
|
||||
|
||||
import kafka.errors as Errors
|
||||
from kafka.producer.buffer import SimpleBufferPool
|
||||
from kafka.producer.future import FutureRecordMetadata, FutureProduceResult
|
||||
from kafka.record.memory_records import MemoryRecordsBuilder
|
||||
from kafka.structs import TopicPartition
|
||||
@@ -41,16 +35,10 @@ class AtomicInteger(object):
|
||||
return self._val
|
||||
|
||||
|
||||
class FinalState(IntEnum):
|
||||
ABORTED = 0
|
||||
FAILED = 1
|
||||
SUCCEEDED = 2
|
||||
|
||||
|
||||
class ProducerBatch(object):
|
||||
def __init__(self, tp, records, now=None):
|
||||
now = time.time() if now is None else now
|
||||
def __init__(self, tp, records, buffer):
|
||||
self.max_record_size = 0
|
||||
now = time.time()
|
||||
self.created = now
|
||||
self.drained = None
|
||||
self.attempts = 0
|
||||
@@ -60,120 +48,81 @@ class ProducerBatch(object):
|
||||
self.topic_partition = tp
|
||||
self.produce_future = FutureProduceResult(tp)
|
||||
self._retry = False
|
||||
self._final_state = None
|
||||
|
||||
@property
|
||||
def final_state(self):
|
||||
return self._final_state
|
||||
self._buffer = buffer # We only save it, we don't write to it
|
||||
|
||||
@property
|
||||
def record_count(self):
|
||||
return self.records.next_offset()
|
||||
|
||||
@property
|
||||
def producer_id(self):
|
||||
return self.records.producer_id if self.records else None
|
||||
|
||||
@property
|
||||
def producer_epoch(self):
|
||||
return self.records.producer_epoch if self.records else None
|
||||
|
||||
@property
|
||||
def has_sequence(self):
|
||||
return self.records.has_sequence if self.records else False
|
||||
|
||||
def try_append(self, timestamp_ms, key, value, headers, now=None):
|
||||
def try_append(self, timestamp_ms, key, value, headers):
|
||||
metadata = self.records.append(timestamp_ms, key, value, headers)
|
||||
if metadata is None:
|
||||
return None
|
||||
|
||||
now = time.time() if now is None else now
|
||||
self.max_record_size = max(self.max_record_size, metadata.size)
|
||||
self.last_append = now
|
||||
future = FutureRecordMetadata(
|
||||
self.produce_future,
|
||||
metadata.offset,
|
||||
metadata.timestamp,
|
||||
metadata.crc,
|
||||
len(key) if key is not None else -1,
|
||||
len(value) if value is not None else -1,
|
||||
sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1)
|
||||
self.last_append = time.time()
|
||||
future = FutureRecordMetadata(self.produce_future, metadata.offset,
|
||||
metadata.timestamp, metadata.crc,
|
||||
len(key) if key is not None else -1,
|
||||
len(value) if value is not None else -1,
|
||||
sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1)
|
||||
return future
|
||||
|
||||
def abort(self, exception):
|
||||
"""Abort the batch and complete the future and callbacks."""
|
||||
if self._final_state is not None:
|
||||
raise Errors.IllegalStateError("Batch has already been completed in final state: %s" % self._final_state)
|
||||
self._final_state = FinalState.ABORTED
|
||||
|
||||
log.debug("Aborting batch for partition %s: %s", self.topic_partition, exception)
|
||||
self._complete_future(-1, -1, exception)
|
||||
|
||||
def done(self, base_offset=None, timestamp_ms=None, exception=None):
|
||||
"""
|
||||
Finalize the state of a batch. Final state, once set, is immutable. This function may be called
|
||||
once or twice on a batch. It may be called twice if
|
||||
1. An inflight batch expires before a response from the broker is received. The batch's final
|
||||
state is set to FAILED. But it could succeed on the broker and second time around batch.done() may
|
||||
try to set SUCCEEDED final state.
|
||||
|
||||
2. If a transaction abortion happens or if the producer is closed forcefully, the final state is
|
||||
ABORTED but again it could succeed if broker responds with a success.
|
||||
|
||||
Attempted transitions from [FAILED | ABORTED] --> SUCCEEDED are logged.
|
||||
Attempted transitions from one failure state to the same or a different failed state are ignored.
|
||||
Attempted transitions from SUCCEEDED to the same or a failed state throw an exception.
|
||||
"""
|
||||
final_state = FinalState.SUCCEEDED if exception is None else FinalState.FAILED
|
||||
if self._final_state is None:
|
||||
self._final_state = final_state
|
||||
if final_state is FinalState.SUCCEEDED:
|
||||
log.debug("Successfully produced messages to %s with base offset %s", self.topic_partition, base_offset)
|
||||
else:
|
||||
log.warning("Failed to produce messages to topic-partition %s with base offset %s: %s",
|
||||
self.topic_partition, base_offset, exception)
|
||||
self._complete_future(base_offset, timestamp_ms, exception)
|
||||
return True
|
||||
|
||||
elif self._final_state is not FinalState.SUCCEEDED:
|
||||
if final_state is FinalState.SUCCEEDED:
|
||||
# Log if a previously unsuccessful batch succeeded later on.
|
||||
log.debug("ProduceResponse returned %s for %s after batch with base offset %s had already been %s.",
|
||||
final_state, self.topic_partition, base_offset, self._final_state)
|
||||
else:
|
||||
# FAILED --> FAILED and ABORTED --> FAILED transitions are ignored.
|
||||
log.debug("Ignored state transition %s -> %s for %s batch with base offset %s",
|
||||
self._final_state, final_state, self.topic_partition, base_offset)
|
||||
else:
|
||||
# A SUCCESSFUL batch must not attempt another state change.
|
||||
raise Errors.IllegalStateError("A %s batch must not attempt another state change to %s" % (self._final_state, final_state))
|
||||
return False
|
||||
|
||||
def _complete_future(self, base_offset, timestamp_ms, exception):
|
||||
def done(self, base_offset=None, timestamp_ms=None, exception=None, log_start_offset=None, global_error=None):
|
||||
level = logging.DEBUG if exception is None else logging.WARNING
|
||||
log.log(level, "Produced messages to topic-partition %s with base offset"
|
||||
" %s log start offset %s and error %s.", self.topic_partition, base_offset,
|
||||
log_start_offset, global_error) # trace
|
||||
if self.produce_future.is_done:
|
||||
raise Errors.IllegalStateError('Batch is already closed!')
|
||||
log.warning('Batch is already closed -- ignoring batch.done()')
|
||||
return
|
||||
elif exception is None:
|
||||
self.produce_future.success((base_offset, timestamp_ms))
|
||||
self.produce_future.success((base_offset, timestamp_ms, log_start_offset))
|
||||
else:
|
||||
self.produce_future.failure(exception)
|
||||
|
||||
def has_reached_delivery_timeout(self, delivery_timeout_ms, now=None):
|
||||
now = time.time() if now is None else now
|
||||
return delivery_timeout_ms / 1000 <= now - self.created
|
||||
def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full):
|
||||
"""Expire batches if metadata is not available
|
||||
|
||||
A batch whose metadata is not available should be expired if one
|
||||
of the following is true:
|
||||
|
||||
* the batch is not in retry AND request timeout has elapsed after
|
||||
it is ready (full or linger.ms has reached).
|
||||
|
||||
* the batch is in retry AND request timeout has elapsed after the
|
||||
backoff period ended.
|
||||
"""
|
||||
now = time.time()
|
||||
since_append = now - self.last_append
|
||||
since_ready = now - (self.created + linger_ms / 1000.0)
|
||||
since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0)
|
||||
timeout = request_timeout_ms / 1000.0
|
||||
|
||||
error = None
|
||||
if not self.in_retry() and is_full and timeout < since_append:
|
||||
error = "%d seconds have passed since last append" % (since_append,)
|
||||
elif not self.in_retry() and timeout < since_ready:
|
||||
error = "%d seconds have passed since batch creation plus linger time" % (since_ready,)
|
||||
elif self.in_retry() and timeout < since_backoff:
|
||||
error = "%d seconds have passed since last attempt plus backoff time" % (since_backoff,)
|
||||
|
||||
if error:
|
||||
self.records.close()
|
||||
self.done(-1, None, Errors.KafkaTimeoutError(
|
||||
"Batch for %s containing %s record(s) expired: %s" % (
|
||||
self.topic_partition, self.records.next_offset(), error)))
|
||||
return True
|
||||
return False
|
||||
|
||||
def in_retry(self):
|
||||
return self._retry
|
||||
|
||||
def retry(self, now=None):
|
||||
now = time.time() if now is None else now
|
||||
def set_retry(self):
|
||||
self._retry = True
|
||||
self.attempts += 1
|
||||
self.last_attempt = now
|
||||
self.last_append = now
|
||||
|
||||
@property
|
||||
def is_done(self):
|
||||
return self.produce_future.is_done
|
||||
def buffer(self):
|
||||
return self._buffer
|
||||
|
||||
def __str__(self):
|
||||
return 'ProducerBatch(topic_partition=%s, record_count=%d)' % (
|
||||
@@ -194,6 +143,12 @@ class RecordAccumulator(object):
|
||||
A small batch size will make batching less common and may reduce
|
||||
throughput (a batch size of zero will disable batching entirely).
|
||||
Default: 16384
|
||||
buffer_memory (int): The total bytes of memory the producer should use
|
||||
to buffer records waiting to be sent to the server. If records are
|
||||
sent faster than they can be delivered to the server the producer
|
||||
will block up to max_block_ms, raising an exception on timeout.
|
||||
In the current implementation, this setting is an approximation.
|
||||
Default: 33554432 (32MB)
|
||||
compression_attrs (int): The compression type for all data generated by
|
||||
the producer. Valid values are gzip(1), snappy(2), lz4(3), or
|
||||
none(0).
|
||||
@@ -201,7 +156,7 @@ class RecordAccumulator(object):
|
||||
will also impact the compression ratio (more batching means better
|
||||
compression). Default: None.
|
||||
linger_ms (int): An artificial delay time to add before declaring a
|
||||
record batch (that isn't full) ready for sending. This allows
|
||||
messageset (that isn't full) ready for sending. This allows
|
||||
time for more records to arrive. Setting a non-zero linger_ms
|
||||
will trade off some latency for potentially better throughput
|
||||
due to more batching (and hence fewer, larger requests).
|
||||
@@ -211,14 +166,14 @@ class RecordAccumulator(object):
|
||||
all retries in a short period of time. Default: 100
|
||||
"""
|
||||
DEFAULT_CONFIG = {
|
||||
'buffer_memory': 33554432,
|
||||
'batch_size': 16384,
|
||||
'compression_attrs': 0,
|
||||
'linger_ms': 0,
|
||||
'request_timeout_ms': 30000,
|
||||
'delivery_timeout_ms': 120000,
|
||||
'retry_backoff_ms': 100,
|
||||
'transaction_manager': None,
|
||||
'message_version': 2,
|
||||
'message_version': 0,
|
||||
'metrics': None,
|
||||
'metric_group_prefix': 'producer-metrics',
|
||||
}
|
||||
|
||||
def __init__(self, **configs):
|
||||
@@ -228,37 +183,22 @@ class RecordAccumulator(object):
|
||||
self.config[key] = configs.pop(key)
|
||||
|
||||
self._closed = False
|
||||
self._transaction_manager = self.config['transaction_manager']
|
||||
self._flushes_in_progress = AtomicInteger()
|
||||
self._appends_in_progress = AtomicInteger()
|
||||
self._batches = collections.defaultdict(collections.deque) # TopicPartition: [ProducerBatch]
|
||||
self._tp_locks = {None: threading.Lock()} # TopicPartition: Lock, plus a lock to add entries
|
||||
self._free = SimpleBufferPool(self.config['buffer_memory'],
|
||||
self.config['batch_size'],
|
||||
metrics=self.config['metrics'],
|
||||
metric_group_prefix=self.config['metric_group_prefix'])
|
||||
self._incomplete = IncompleteProducerBatches()
|
||||
# The following variables should only be accessed by the sender thread,
|
||||
# so we don't need to protect them w/ locking.
|
||||
self.muted = set()
|
||||
self._drain_index = 0
|
||||
self._next_batch_expiry_time_ms = float('inf')
|
||||
|
||||
if self.config['delivery_timeout_ms'] < self.config['linger_ms'] + self.config['request_timeout_ms']:
|
||||
raise Errors.KafkaConfigurationError("Must set delivery_timeout_ms higher than linger_ms + request_timeout_ms")
|
||||
|
||||
@property
|
||||
def delivery_timeout_ms(self):
|
||||
return self.config['delivery_timeout_ms']
|
||||
|
||||
@property
|
||||
def next_expiry_time_ms(self):
|
||||
return self._next_batch_expiry_time_ms
|
||||
|
||||
def _tp_lock(self, tp):
|
||||
if tp not in self._tp_locks:
|
||||
with self._tp_locks[None]:
|
||||
if tp not in self._tp_locks:
|
||||
self._tp_locks[tp] = threading.Lock()
|
||||
return self._tp_locks[tp]
|
||||
|
||||
def append(self, tp, timestamp_ms, key, value, headers, now=None):
|
||||
def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms,
|
||||
estimated_size=0):
|
||||
"""Add a record to the accumulator, return the append result.
|
||||
|
||||
The append result will contain the future metadata, and flag for
|
||||
@@ -271,53 +211,59 @@ class RecordAccumulator(object):
|
||||
key (bytes): The key for the record
|
||||
value (bytes): The value for the record
|
||||
headers (List[Tuple[str, bytes]]): The header fields for the record
|
||||
max_time_to_block_ms (int): The maximum time in milliseconds to
|
||||
block for buffer memory to be available
|
||||
|
||||
Returns:
|
||||
tuple: (future, batch_is_full, new_batch_created)
|
||||
"""
|
||||
assert isinstance(tp, TopicPartition), 'not TopicPartition'
|
||||
assert not self._closed, 'RecordAccumulator is closed'
|
||||
now = time.time() if now is None else now
|
||||
# We keep track of the number of appending thread to make sure we do
|
||||
# not miss batches in abortIncompleteBatches().
|
||||
self._appends_in_progress.increment()
|
||||
try:
|
||||
with self._tp_lock(tp):
|
||||
if tp not in self._tp_locks:
|
||||
with self._tp_locks[None]:
|
||||
if tp not in self._tp_locks:
|
||||
self._tp_locks[tp] = threading.Lock()
|
||||
|
||||
with self._tp_locks[tp]:
|
||||
# check if we have an in-progress batch
|
||||
dq = self._batches[tp]
|
||||
if dq:
|
||||
last = dq[-1]
|
||||
future = last.try_append(timestamp_ms, key, value, headers, now=now)
|
||||
future = last.try_append(timestamp_ms, key, value, headers)
|
||||
if future is not None:
|
||||
batch_is_full = len(dq) > 1 or last.records.is_full()
|
||||
return future, batch_is_full, False
|
||||
|
||||
with self._tp_lock(tp):
|
||||
size = max(self.config['batch_size'], estimated_size)
|
||||
log.debug("Allocating a new %d byte message buffer for %s", size, tp) # trace
|
||||
buf = self._free.allocate(size, max_time_to_block_ms)
|
||||
with self._tp_locks[tp]:
|
||||
# Need to check if producer is closed again after grabbing the
|
||||
# dequeue lock.
|
||||
assert not self._closed, 'RecordAccumulator is closed'
|
||||
|
||||
if dq:
|
||||
last = dq[-1]
|
||||
future = last.try_append(timestamp_ms, key, value, headers, now=now)
|
||||
future = last.try_append(timestamp_ms, key, value, headers)
|
||||
if future is not None:
|
||||
# Somebody else found us a batch, return the one we
|
||||
# waited for! Hopefully this doesn't happen often...
|
||||
self._free.deallocate(buf)
|
||||
batch_is_full = len(dq) > 1 or last.records.is_full()
|
||||
return future, batch_is_full, False
|
||||
|
||||
if self._transaction_manager and self.config['message_version'] < 2:
|
||||
raise Errors.UnsupportedVersionError("Attempting to use idempotence with a broker which"
|
||||
" does not support the required message format (v2)."
|
||||
" The broker must be version 0.11 or later.")
|
||||
records = MemoryRecordsBuilder(
|
||||
self.config['message_version'],
|
||||
self.config['compression_attrs'],
|
||||
self.config['batch_size']
|
||||
)
|
||||
|
||||
batch = ProducerBatch(tp, records, now=now)
|
||||
future = batch.try_append(timestamp_ms, key, value, headers, now=now)
|
||||
batch = ProducerBatch(tp, records, buf)
|
||||
future = batch.try_append(timestamp_ms, key, value, headers)
|
||||
if not future:
|
||||
raise Exception()
|
||||
|
||||
@@ -328,43 +274,79 @@ class RecordAccumulator(object):
|
||||
finally:
|
||||
self._appends_in_progress.decrement()
|
||||
|
||||
def reset_next_batch_expiry_time(self):
|
||||
self._next_batch_expiry_time_ms = float('inf')
|
||||
def abort_expired_batches(self, request_timeout_ms, cluster):
|
||||
"""Abort the batches that have been sitting in RecordAccumulator for
|
||||
more than the configured request_timeout due to metadata being
|
||||
unavailable.
|
||||
|
||||
def maybe_update_next_batch_expiry_time(self, batch):
|
||||
self._next_batch_expiry_time_ms = min(self._next_batch_expiry_time_ms, batch.created * 1000 + self.delivery_timeout_ms)
|
||||
Arguments:
|
||||
request_timeout_ms (int): milliseconds to timeout
|
||||
cluster (ClusterMetadata): current metadata for kafka cluster
|
||||
|
||||
def expired_batches(self, now=None):
|
||||
"""Get a list of batches which have been sitting in the accumulator too long and need to be expired."""
|
||||
Returns:
|
||||
list of ProducerBatch that were expired
|
||||
"""
|
||||
expired_batches = []
|
||||
to_remove = []
|
||||
count = 0
|
||||
for tp in list(self._batches.keys()):
|
||||
with self._tp_lock(tp):
|
||||
assert tp in self._tp_locks, 'TopicPartition not in locks dict'
|
||||
|
||||
# We only check if the batch should be expired if the partition
|
||||
# does not have a batch in flight. This is to avoid the later
|
||||
# batches get expired when an earlier batch is still in progress.
|
||||
# This protection only takes effect when user sets
|
||||
# max.in.flight.request.per.connection=1. Otherwise the expiration
|
||||
# order is not guranteed.
|
||||
if tp in self.muted:
|
||||
continue
|
||||
|
||||
with self._tp_locks[tp]:
|
||||
# iterate over the batches and expire them if they have stayed
|
||||
# in accumulator for more than request_timeout_ms
|
||||
dq = self._batches[tp]
|
||||
while dq:
|
||||
batch = dq[0]
|
||||
if batch.has_reached_delivery_timeout(self.delivery_timeout_ms, now=now):
|
||||
dq.popleft()
|
||||
batch.records.close()
|
||||
for batch in dq:
|
||||
is_full = bool(bool(batch != dq[-1]) or batch.records.is_full())
|
||||
# check if the batch is expired
|
||||
if batch.maybe_expire(request_timeout_ms,
|
||||
self.config['retry_backoff_ms'],
|
||||
self.config['linger_ms'],
|
||||
is_full):
|
||||
expired_batches.append(batch)
|
||||
to_remove.append(batch)
|
||||
count += 1
|
||||
self.deallocate(batch)
|
||||
else:
|
||||
# Stop at the first batch that has not expired.
|
||||
self.maybe_update_next_batch_expiry_time(batch)
|
||||
break
|
||||
|
||||
# Python does not allow us to mutate the dq during iteration
|
||||
# Assuming expired batches are infrequent, this is better than
|
||||
# creating a new copy of the deque for iteration on every loop
|
||||
if to_remove:
|
||||
for batch in to_remove:
|
||||
dq.remove(batch)
|
||||
to_remove = []
|
||||
|
||||
if expired_batches:
|
||||
log.warning("Expired %d batches in accumulator", count) # trace
|
||||
|
||||
return expired_batches
|
||||
|
||||
def reenqueue(self, batch, now=None):
|
||||
"""
|
||||
Re-enqueue the given record batch in the accumulator. In Sender._complete_batch method, we check
|
||||
whether the batch has reached delivery_timeout_ms or not. Hence we do not do the delivery timeout check here.
|
||||
"""
|
||||
batch.retry(now=now)
|
||||
with self._tp_lock(batch.topic_partition):
|
||||
dq = self._batches[batch.topic_partition]
|
||||
def reenqueue(self, batch):
|
||||
"""Re-enqueue the given record batch in the accumulator to retry."""
|
||||
now = time.time()
|
||||
batch.attempts += 1
|
||||
batch.last_attempt = now
|
||||
batch.last_append = now
|
||||
batch.set_retry()
|
||||
assert batch.topic_partition in self._tp_locks, 'TopicPartition not in locks dict'
|
||||
assert batch.topic_partition in self._batches, 'TopicPartition not in batches'
|
||||
dq = self._batches[batch.topic_partition]
|
||||
with self._tp_locks[batch.topic_partition]:
|
||||
dq.appendleft(batch)
|
||||
|
||||
def ready(self, cluster, now=None):
|
||||
def ready(self, cluster):
|
||||
"""
|
||||
Get a list of nodes whose partitions are ready to be sent, and the
|
||||
earliest time at which any non-sendable partition will be ready;
|
||||
@@ -398,8 +380,9 @@ class RecordAccumulator(object):
|
||||
ready_nodes = set()
|
||||
next_ready_check = 9999999.99
|
||||
unknown_leaders_exist = False
|
||||
now = time.time() if now is None else now
|
||||
now = time.time()
|
||||
|
||||
exhausted = bool(self._free.queued() > 0)
|
||||
# several threads are accessing self._batches -- to simplify
|
||||
# concurrent access, we iterate over a snapshot of partitions
|
||||
# and lock each partition separately as needed
|
||||
@@ -414,23 +397,23 @@ class RecordAccumulator(object):
|
||||
elif tp in self.muted:
|
||||
continue
|
||||
|
||||
with self._tp_lock(tp):
|
||||
with self._tp_locks[tp]:
|
||||
dq = self._batches[tp]
|
||||
if not dq:
|
||||
continue
|
||||
batch = dq[0]
|
||||
retry_backoff = self.config['retry_backoff_ms'] / 1000
|
||||
linger = self.config['linger_ms'] / 1000
|
||||
backing_off = bool(batch.attempts > 0
|
||||
and (batch.last_attempt + retry_backoff) > now)
|
||||
retry_backoff = self.config['retry_backoff_ms'] / 1000.0
|
||||
linger = self.config['linger_ms'] / 1000.0
|
||||
backing_off = bool(batch.attempts > 0 and
|
||||
batch.last_attempt + retry_backoff > now)
|
||||
waited_time = now - batch.last_attempt
|
||||
time_to_wait = retry_backoff if backing_off else linger
|
||||
time_left = max(time_to_wait - waited_time, 0)
|
||||
full = bool(len(dq) > 1 or batch.records.is_full())
|
||||
expired = bool(waited_time >= time_to_wait)
|
||||
|
||||
sendable = (full or expired or self._closed or
|
||||
self.flush_in_progress())
|
||||
sendable = (full or expired or exhausted or self._closed or
|
||||
self._flush_in_progress())
|
||||
|
||||
if sendable and not backing_off:
|
||||
ready_nodes.add(leader)
|
||||
@@ -444,98 +427,16 @@ class RecordAccumulator(object):
|
||||
|
||||
return ready_nodes, next_ready_check, unknown_leaders_exist
|
||||
|
||||
def has_undrained(self):
|
||||
"""Check whether there are any batches which haven't been drained"""
|
||||
def has_unsent(self):
|
||||
"""Return whether there is any unsent record in the accumulator."""
|
||||
for tp in list(self._batches.keys()):
|
||||
with self._tp_lock(tp):
|
||||
with self._tp_locks[tp]:
|
||||
dq = self._batches[tp]
|
||||
if len(dq):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _should_stop_drain_batches_for_partition(self, first, tp):
|
||||
if self._transaction_manager:
|
||||
if not self._transaction_manager.is_send_to_partition_allowed(tp):
|
||||
return True
|
||||
if not self._transaction_manager.producer_id_and_epoch.is_valid:
|
||||
# we cannot send the batch until we have refreshed the PID
|
||||
log.debug("Waiting to send ready batches because transaction producer id is not valid")
|
||||
return True
|
||||
return False
|
||||
|
||||
def drain_batches_for_one_node(self, cluster, node_id, max_size, now=None):
|
||||
now = time.time() if now is None else now
|
||||
size = 0
|
||||
ready = []
|
||||
partitions = list(cluster.partitions_for_broker(node_id))
|
||||
if not partitions:
|
||||
return ready
|
||||
# to make starvation less likely this loop doesn't start at 0
|
||||
self._drain_index %= len(partitions)
|
||||
start = None
|
||||
while start != self._drain_index:
|
||||
tp = partitions[self._drain_index]
|
||||
if start is None:
|
||||
start = self._drain_index
|
||||
self._drain_index += 1
|
||||
self._drain_index %= len(partitions)
|
||||
|
||||
# Only proceed if the partition has no in-flight batches.
|
||||
if tp in self.muted:
|
||||
continue
|
||||
|
||||
if tp not in self._batches:
|
||||
continue
|
||||
|
||||
with self._tp_lock(tp):
|
||||
dq = self._batches[tp]
|
||||
if len(dq) == 0:
|
||||
continue
|
||||
first = dq[0]
|
||||
backoff = bool(first.attempts > 0 and
|
||||
first.last_attempt + self.config['retry_backoff_ms'] / 1000 > now)
|
||||
# Only drain the batch if it is not during backoff
|
||||
if backoff:
|
||||
continue
|
||||
|
||||
if (size + first.records.size_in_bytes() > max_size
|
||||
and len(ready) > 0):
|
||||
# there is a rare case that a single batch
|
||||
# size is larger than the request size due
|
||||
# to compression; in this case we will
|
||||
# still eventually send this batch in a
|
||||
# single request
|
||||
break
|
||||
else:
|
||||
if self._should_stop_drain_batches_for_partition(first, tp):
|
||||
break
|
||||
|
||||
batch = dq.popleft()
|
||||
if self._transaction_manager and not batch.in_retry():
|
||||
# If the batch is in retry, then we should not change the pid and
|
||||
# sequence number, since this may introduce duplicates. In particular,
|
||||
# the previous attempt may actually have been accepted, and if we change
|
||||
# the pid and sequence here, this attempt will also be accepted, causing
|
||||
# a duplicate.
|
||||
sequence_number = self._transaction_manager.sequence_number(batch.topic_partition)
|
||||
log.debug("Dest: %s: %s producer_id=%s epoch=%s sequence=%s",
|
||||
node_id, batch.topic_partition,
|
||||
self._transaction_manager.producer_id_and_epoch.producer_id,
|
||||
self._transaction_manager.producer_id_and_epoch.epoch,
|
||||
sequence_number)
|
||||
batch.records.set_producer_state(
|
||||
self._transaction_manager.producer_id_and_epoch.producer_id,
|
||||
self._transaction_manager.producer_id_and_epoch.epoch,
|
||||
sequence_number,
|
||||
self._transaction_manager.is_transactional()
|
||||
)
|
||||
batch.records.close()
|
||||
size += batch.records.size_in_bytes()
|
||||
ready.append(batch)
|
||||
batch.drained = now
|
||||
return ready
|
||||
|
||||
def drain(self, cluster, nodes, max_size, now=None):
|
||||
def drain(self, cluster, nodes, max_size):
|
||||
"""
|
||||
Drain all the data for the given nodes and collate them into a list of
|
||||
batches that will fit within the specified size on a per-node basis.
|
||||
@@ -553,17 +454,59 @@ class RecordAccumulator(object):
|
||||
if not nodes:
|
||||
return {}
|
||||
|
||||
now = time.time() if now is None else now
|
||||
now = time.time()
|
||||
batches = {}
|
||||
for node_id in nodes:
|
||||
batches[node_id] = self.drain_batches_for_one_node(cluster, node_id, max_size, now=now)
|
||||
size = 0
|
||||
partitions = list(cluster.partitions_for_broker(node_id))
|
||||
ready = []
|
||||
# to make starvation less likely this loop doesn't start at 0
|
||||
self._drain_index %= len(partitions)
|
||||
start = self._drain_index
|
||||
while True:
|
||||
tp = partitions[self._drain_index]
|
||||
if tp in self._batches and tp not in self.muted:
|
||||
with self._tp_locks[tp]:
|
||||
dq = self._batches[tp]
|
||||
if dq:
|
||||
first = dq[0]
|
||||
backoff = (
|
||||
bool(first.attempts > 0) and
|
||||
bool(first.last_attempt +
|
||||
self.config['retry_backoff_ms'] / 1000.0
|
||||
> now)
|
||||
)
|
||||
# Only drain the batch if it is not during backoff
|
||||
if not backoff:
|
||||
if (size + first.records.size_in_bytes() > max_size
|
||||
and len(ready) > 0):
|
||||
# there is a rare case that a single batch
|
||||
# size is larger than the request size due
|
||||
# to compression; in this case we will
|
||||
# still eventually send this batch in a
|
||||
# single request
|
||||
break
|
||||
else:
|
||||
batch = dq.popleft()
|
||||
batch.records.close()
|
||||
size += batch.records.size_in_bytes()
|
||||
ready.append(batch)
|
||||
batch.drained = now
|
||||
|
||||
self._drain_index += 1
|
||||
self._drain_index %= len(partitions)
|
||||
if start == self._drain_index:
|
||||
break
|
||||
|
||||
batches[node_id] = ready
|
||||
return batches
|
||||
|
||||
def deallocate(self, batch):
|
||||
"""Deallocate the record batch."""
|
||||
self._incomplete.remove(batch)
|
||||
self._free.deallocate(batch.buffer())
|
||||
|
||||
def flush_in_progress(self):
|
||||
def _flush_in_progress(self):
|
||||
"""Are there any threads currently waiting on a flush?"""
|
||||
return self._flushes_in_progress.get() > 0
|
||||
|
||||
@@ -592,10 +535,6 @@ class RecordAccumulator(object):
|
||||
finally:
|
||||
self._flushes_in_progress.decrement()
|
||||
|
||||
@property
|
||||
def has_incomplete(self):
|
||||
return bool(self._incomplete)
|
||||
|
||||
def abort_incomplete_batches(self):
|
||||
"""
|
||||
This function is only called when sender is closed forcefully. It will fail all the
|
||||
@@ -605,41 +544,27 @@ class RecordAccumulator(object):
|
||||
# 1. Avoid losing batches.
|
||||
# 2. Free up memory in case appending threads are blocked on buffer full.
|
||||
# This is a tight loop but should be able to get through very quickly.
|
||||
error = Errors.IllegalStateError("Producer is closed forcefully.")
|
||||
while True:
|
||||
self._abort_batches(error)
|
||||
self._abort_batches()
|
||||
if not self._appends_in_progress.get():
|
||||
break
|
||||
# After this point, no thread will append any messages because they will see the close
|
||||
# flag set. We need to do the last abort after no thread was appending in case the there was a new
|
||||
# batch appended by the last appending thread.
|
||||
self._abort_batches(error)
|
||||
self._abort_batches()
|
||||
self._batches.clear()
|
||||
|
||||
def _abort_batches(self, error):
|
||||
def _abort_batches(self):
|
||||
"""Go through incomplete batches and abort them."""
|
||||
error = Errors.IllegalStateError("Producer is closed forcefully.")
|
||||
for batch in self._incomplete.all():
|
||||
tp = batch.topic_partition
|
||||
# Close the batch before aborting
|
||||
with self._tp_lock(tp):
|
||||
with self._tp_locks[tp]:
|
||||
batch.records.close()
|
||||
self._batches[tp].remove(batch)
|
||||
batch.abort(error)
|
||||
batch.done(exception=error)
|
||||
self.deallocate(batch)
|
||||
|
||||
def abort_undrained_batches(self, error):
|
||||
for batch in self._incomplete.all():
|
||||
tp = batch.topic_partition
|
||||
with self._tp_lock(tp):
|
||||
aborted = False
|
||||
if not batch.is_done:
|
||||
aborted = True
|
||||
batch.records.close()
|
||||
self._batches[tp].remove(batch)
|
||||
if aborted:
|
||||
batch.abort(error)
|
||||
self.deallocate(batch)
|
||||
|
||||
def close(self):
|
||||
"""Close this accumulator and force all the record buffers to be drained."""
|
||||
self._closed = True
|
||||
@@ -654,21 +579,12 @@ class IncompleteProducerBatches(object):
|
||||
|
||||
def add(self, batch):
|
||||
with self._lock:
|
||||
self._incomplete.add(batch)
|
||||
return self._incomplete.add(batch)
|
||||
|
||||
def remove(self, batch):
|
||||
with self._lock:
|
||||
try:
|
||||
self._incomplete.remove(batch)
|
||||
except KeyError:
|
||||
pass
|
||||
return self._incomplete.remove(batch)
|
||||
|
||||
def all(self):
|
||||
with self._lock:
|
||||
return list(self._incomplete)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self._incomplete)
|
||||
|
||||
|
||||
__nonzero__ = __bool__
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import, division
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import heapq
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
@@ -12,8 +11,6 @@ from kafka.vendor import six
|
||||
from kafka import errors as Errors
|
||||
from kafka.metrics.measurable import AnonMeasurable
|
||||
from kafka.metrics.stats import Avg, Max, Rate
|
||||
from kafka.producer.transaction_manager import ProducerIdAndEpoch
|
||||
from kafka.protocol.init_producer_id import InitProducerIdRequest
|
||||
from kafka.protocol.produce import ProduceRequest
|
||||
from kafka.structs import TopicPartition
|
||||
from kafka.version import __version__
|
||||
@@ -30,18 +27,14 @@ class Sender(threading.Thread):
|
||||
DEFAULT_CONFIG = {
|
||||
'max_request_size': 1048576,
|
||||
'acks': 1,
|
||||
'retries': float('inf'),
|
||||
'retries': 0,
|
||||
'request_timeout_ms': 30000,
|
||||
'retry_backoff_ms': 100,
|
||||
'metrics': None,
|
||||
'guarantee_message_order': False,
|
||||
'transaction_manager': None,
|
||||
'transactional_id': None,
|
||||
'transaction_timeout_ms': 60000,
|
||||
'client_id': 'kafka-python-' + __version__,
|
||||
'api_version': (0, 8, 0),
|
||||
}
|
||||
|
||||
def __init__(self, client, metadata, accumulator, **configs):
|
||||
def __init__(self, client, metadata, accumulator, metrics, **configs):
|
||||
super(Sender, self).__init__()
|
||||
self.config = copy.copy(self.DEFAULT_CONFIG)
|
||||
for key in self.config:
|
||||
@@ -55,75 +48,32 @@ class Sender(threading.Thread):
|
||||
self._running = True
|
||||
self._force_close = False
|
||||
self._topics_to_add = set()
|
||||
if self.config['metrics']:
|
||||
self._sensors = SenderMetrics(self.config['metrics'], self._client, self._metadata)
|
||||
else:
|
||||
self._sensors = None
|
||||
self._transaction_manager = self.config['transaction_manager']
|
||||
# A per-partition queue of batches ordered by creation time for tracking the in-flight batches
|
||||
self._in_flight_batches = collections.defaultdict(list)
|
||||
|
||||
def _maybe_remove_from_inflight_batches(self, batch):
|
||||
try:
|
||||
queue = self._in_flight_batches[batch.topic_partition]
|
||||
except KeyError:
|
||||
return
|
||||
try:
|
||||
idx = queue.index((batch.created, batch))
|
||||
except ValueError:
|
||||
return
|
||||
# https://stackoverflow.com/questions/10162679/python-delete-element-from-heap
|
||||
queue[idx] = queue[-1]
|
||||
queue.pop()
|
||||
heapq.heapify(queue)
|
||||
|
||||
def _get_expired_inflight_batches(self, now=None):
|
||||
"""Get the in-flight batches that has reached delivery timeout."""
|
||||
expired_batches = []
|
||||
to_remove = []
|
||||
for tp, queue in six.iteritems(self._in_flight_batches):
|
||||
while queue:
|
||||
_created_at, batch = queue[0]
|
||||
if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms):
|
||||
heapq.heappop(queue)
|
||||
if batch.final_state is None:
|
||||
expired_batches.append(batch)
|
||||
else:
|
||||
raise Errors.IllegalStateError("%s batch created at %s gets unexpected final state %s" % (batch.topic_partition, batch.created, batch.final_state))
|
||||
else:
|
||||
self._accumulator.maybe_update_next_batch_expiry_time(batch)
|
||||
break
|
||||
else:
|
||||
# Avoid mutating in_flight_batches during iteration
|
||||
to_remove.append(tp)
|
||||
for tp in to_remove:
|
||||
del self._in_flight_batches[tp]
|
||||
return expired_batches
|
||||
self._sensors = SenderMetrics(metrics, self._client, self._metadata)
|
||||
|
||||
def run(self):
|
||||
"""The main run loop for the sender thread."""
|
||||
log.debug("%s: Starting Kafka producer I/O thread.", str(self))
|
||||
log.debug("Starting Kafka producer I/O thread.")
|
||||
|
||||
# main loop, runs until close is called
|
||||
while self._running:
|
||||
try:
|
||||
self.run_once()
|
||||
except Exception:
|
||||
log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
|
||||
log.exception("Uncaught error in kafka producer I/O thread")
|
||||
|
||||
log.debug("%s: Beginning shutdown of Kafka producer I/O thread, sending"
|
||||
" remaining records.", str(self))
|
||||
log.debug("Beginning shutdown of Kafka producer I/O thread, sending"
|
||||
" remaining records.")
|
||||
|
||||
# okay we stopped accepting requests but there may still be
|
||||
# requests in the accumulator or waiting for acknowledgment,
|
||||
# wait until these are completed.
|
||||
while (not self._force_close
|
||||
and (self._accumulator.has_undrained()
|
||||
and (self._accumulator.has_unsent()
|
||||
or self._client.in_flight_request_count() > 0)):
|
||||
try:
|
||||
self.run_once()
|
||||
except Exception:
|
||||
log.exception("%s: Uncaught error in kafka producer I/O thread", str(self))
|
||||
log.exception("Uncaught error in kafka producer I/O thread")
|
||||
|
||||
if self._force_close:
|
||||
# We need to fail all the incomplete batches and wake up the
|
||||
@@ -133,75 +83,38 @@ class Sender(threading.Thread):
|
||||
try:
|
||||
self._client.close()
|
||||
except Exception:
|
||||
log.exception("%s: Failed to close network client", str(self))
|
||||
log.exception("Failed to close network client")
|
||||
|
||||
log.debug("%s: Shutdown of Kafka producer I/O thread has completed.", str(self))
|
||||
log.debug("Shutdown of Kafka producer I/O thread has completed.")
|
||||
|
||||
def run_once(self):
|
||||
"""Run a single iteration of sending."""
|
||||
while self._topics_to_add:
|
||||
self._client.add_topic(self._topics_to_add.pop())
|
||||
|
||||
if self._transaction_manager:
|
||||
try:
|
||||
if not self._transaction_manager.is_transactional():
|
||||
# this is an idempotent producer, so make sure we have a producer id
|
||||
self._maybe_wait_for_producer_id()
|
||||
elif self._transaction_manager.has_in_flight_transactional_request() or self._maybe_send_transactional_request():
|
||||
# as long as there are outstanding transactional requests, we simply wait for them to return
|
||||
self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
|
||||
return
|
||||
|
||||
# do not continue sending if the transaction manager is in a failed state or if there
|
||||
# is no producer id (for the idempotent case).
|
||||
if self._transaction_manager.has_fatal_error() or not self._transaction_manager.has_producer_id():
|
||||
last_error = self._transaction_manager.last_error
|
||||
if last_error is not None:
|
||||
self._maybe_abort_batches(last_error)
|
||||
self._client.poll(timeout_ms=self.config['retry_backoff_ms'])
|
||||
return
|
||||
elif self._transaction_manager.has_abortable_error():
|
||||
self._accumulator.abort_undrained_batches(self._transaction_manager.last_error)
|
||||
|
||||
except Errors.SaslAuthenticationFailedError as e:
|
||||
# This is already logged as error, but propagated here to perform any clean ups.
|
||||
log.debug("%s: Authentication exception while processing transactional request: %s", str(self), e)
|
||||
self._transaction_manager.authentication_failed(e)
|
||||
|
||||
poll_timeout_ms = self._send_producer_data()
|
||||
self._client.poll(timeout_ms=poll_timeout_ms)
|
||||
|
||||
def _send_producer_data(self, now=None):
|
||||
now = time.time() if now is None else now
|
||||
# get the list of partitions with data ready to send
|
||||
result = self._accumulator.ready(self._metadata, now=now)
|
||||
result = self._accumulator.ready(self._metadata)
|
||||
ready_nodes, next_ready_check_delay, unknown_leaders_exist = result
|
||||
|
||||
# if there are any partitions whose leaders are not known yet, force
|
||||
# metadata update
|
||||
if unknown_leaders_exist:
|
||||
log.debug('%s: Unknown leaders exist, requesting metadata update', str(self))
|
||||
log.debug('Unknown leaders exist, requesting metadata update')
|
||||
self._metadata.request_update()
|
||||
|
||||
# remove any nodes we aren't ready to send to
|
||||
not_ready_timeout_ms = float('inf')
|
||||
not_ready_timeout = float('inf')
|
||||
for node in list(ready_nodes):
|
||||
if not self._client.is_ready(node):
|
||||
node_delay_ms = self._client.connection_delay(node)
|
||||
log.debug('%s: Node %s not ready; delaying produce of accumulated batch (%f ms)', str(self), node, node_delay_ms)
|
||||
log.debug('Node %s not ready; delaying produce of accumulated batch', node)
|
||||
self._client.maybe_connect(node, wakeup=False)
|
||||
ready_nodes.remove(node)
|
||||
not_ready_timeout_ms = min(not_ready_timeout_ms, node_delay_ms)
|
||||
not_ready_timeout = min(not_ready_timeout,
|
||||
self._client.connection_delay(node))
|
||||
|
||||
# create produce requests
|
||||
batches_by_node = self._accumulator.drain(
|
||||
self._metadata, ready_nodes, self.config['max_request_size'], now=now)
|
||||
|
||||
for batch_list in six.itervalues(batches_by_node):
|
||||
for batch in batch_list:
|
||||
item = (batch.created, batch)
|
||||
queue = self._in_flight_batches[batch.topic_partition]
|
||||
heapq.heappush(queue, item)
|
||||
self._metadata, ready_nodes, self.config['max_request_size'])
|
||||
|
||||
if self.config['guarantee_message_order']:
|
||||
# Mute all the partitions drained
|
||||
@@ -209,130 +122,42 @@ class Sender(threading.Thread):
|
||||
for batch in batch_list:
|
||||
self._accumulator.muted.add(batch.topic_partition)
|
||||
|
||||
self._accumulator.reset_next_batch_expiry_time()
|
||||
expired_batches = self._accumulator.expired_batches(now=now)
|
||||
expired_batches.extend(self._get_expired_inflight_batches(now=now))
|
||||
|
||||
if expired_batches:
|
||||
log.debug("%s: Expired %s batches in accumulator", str(self), len(expired_batches))
|
||||
|
||||
# Reset the producer_id if an expired batch has previously been sent to the broker.
|
||||
# See the documentation of `TransactionState.reset_producer_id` to understand why
|
||||
# we need to reset the producer id here.
|
||||
if self._transaction_manager and any([batch.in_retry() for batch in expired_batches]):
|
||||
needs_transaction_state_reset = True
|
||||
else:
|
||||
needs_transaction_state_reset = False
|
||||
|
||||
expired_batches = self._accumulator.abort_expired_batches(
|
||||
self.config['request_timeout_ms'], self._metadata)
|
||||
for expired_batch in expired_batches:
|
||||
error = Errors.KafkaTimeoutError(
|
||||
"Expiring %d record(s) for %s: %s ms has passed since batch creation" % (
|
||||
expired_batch.record_count, expired_batch.topic_partition,
|
||||
int((time.time() - expired_batch.created) * 1000)))
|
||||
self._fail_batch(expired_batch, error, base_offset=-1)
|
||||
|
||||
if self._sensors:
|
||||
self._sensors.update_produce_request_metrics(batches_by_node)
|
||||
|
||||
if needs_transaction_state_reset:
|
||||
self._transaction_manager.reset_producer_id()
|
||||
return 0
|
||||
self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count)
|
||||
|
||||
self._sensors.update_produce_request_metrics(batches_by_node)
|
||||
requests = self._create_produce_requests(batches_by_node)
|
||||
# If we have any nodes that are ready to send + have sendable data,
|
||||
# poll with 0 timeout so this can immediately loop and try sending more
|
||||
# data. Otherwise, the timeout will be the smaller value between next
|
||||
# batch expiry time, and the delay time for checking data availability.
|
||||
# Note that the nodes may have data that isn't yet sendable due to
|
||||
# lingering, backing off, etc. This specifically does not include nodes with
|
||||
# data. Otherwise, the timeout is determined by nodes that have
|
||||
# partitions with data that isn't yet sendable (e.g. lingering, backing
|
||||
# off). Note that this specifically does not include nodes with
|
||||
# sendable data that aren't ready to send since they would cause busy
|
||||
# looping.
|
||||
poll_timeout_ms = min(next_ready_check_delay * 1000,
|
||||
not_ready_timeout_ms,
|
||||
self._accumulator.next_expiry_time_ms - now * 1000)
|
||||
if poll_timeout_ms < 0:
|
||||
poll_timeout_ms = 0
|
||||
|
||||
poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout)
|
||||
if ready_nodes:
|
||||
log.debug("%s: Nodes with data ready to send: %s", str(self), ready_nodes) # trace
|
||||
log.debug("%s: Created %d produce requests: %s", str(self), len(requests), requests) # trace
|
||||
# if some partitions are already ready to be sent, the select time
|
||||
# would be 0; otherwise if some partition already has some data
|
||||
# accumulated but not ready yet, the select time will be the time
|
||||
# difference between now and its linger expiry time; otherwise the
|
||||
# select time will be the time difference between now and the
|
||||
# metadata expiry time
|
||||
log.debug("Nodes with data ready to send: %s", ready_nodes) # trace
|
||||
log.debug("Created %d produce requests: %s", len(requests), requests) # trace
|
||||
poll_timeout_ms = 0
|
||||
|
||||
for node_id, request in six.iteritems(requests):
|
||||
batches = batches_by_node[node_id]
|
||||
log.debug('%s: Sending Produce Request: %r', str(self), request)
|
||||
log.debug('Sending Produce Request: %r', request)
|
||||
(self._client.send(node_id, request, wakeup=False)
|
||||
.add_callback(
|
||||
self._handle_produce_response, node_id, time.time(), batches)
|
||||
.add_errback(
|
||||
self._failed_produce, batches, node_id))
|
||||
return poll_timeout_ms
|
||||
|
||||
def _maybe_send_transactional_request(self):
|
||||
if self._transaction_manager.is_completing() and self._accumulator.has_incomplete:
|
||||
if self._transaction_manager.is_aborting():
|
||||
self._accumulator.abort_undrained_batches(Errors.KafkaError("Failing batch since transaction was aborted"))
|
||||
# There may still be requests left which are being retried. Since we do not know whether they had
|
||||
# been successfully appended to the broker log, we must resend them until their final status is clear.
|
||||
# If they had been appended and we did not receive the error, then our sequence number would no longer
|
||||
# be correct which would lead to an OutOfSequenceNumberError.
|
||||
if not self._accumulator.flush_in_progress():
|
||||
self._accumulator.begin_flush()
|
||||
|
||||
next_request_handler = self._transaction_manager.next_request_handler(self._accumulator.has_incomplete)
|
||||
if next_request_handler is None:
|
||||
return False
|
||||
|
||||
log.debug("%s: Sending transactional request %s", str(self), next_request_handler.request)
|
||||
while not self._force_close:
|
||||
target_node = None
|
||||
try:
|
||||
if next_request_handler.needs_coordinator():
|
||||
target_node = self._transaction_manager.coordinator(next_request_handler.coordinator_type)
|
||||
if target_node is None:
|
||||
self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
|
||||
break
|
||||
elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
|
||||
self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
|
||||
target_node = None
|
||||
break
|
||||
else:
|
||||
target_node = self._client.least_loaded_node()
|
||||
if target_node is not None and not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']):
|
||||
target_node = None
|
||||
|
||||
if target_node is not None:
|
||||
if next_request_handler.is_retry:
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
txn_correlation_id = self._transaction_manager.next_in_flight_request_correlation_id()
|
||||
future = self._client.send(target_node, next_request_handler.request)
|
||||
future.add_both(next_request_handler.on_complete, txn_correlation_id)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log.warn("%s: Got an exception when trying to find a node to send a transactional request to. Going to back off and retry: %s", str(self), e)
|
||||
if next_request_handler.needs_coordinator():
|
||||
self._transaction_manager.lookup_coordinator_for_request(next_request_handler)
|
||||
break
|
||||
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
self._metadata.request_update()
|
||||
|
||||
if target_node is None:
|
||||
self._transaction_manager.retry(next_request_handler)
|
||||
|
||||
return True
|
||||
|
||||
def _maybe_abort_batches(self, exc):
|
||||
if self._accumulator.has_incomplete:
|
||||
log.error("%s: Aborting producer batches due to fatal error: %s", str(self), exc)
|
||||
self._accumulator.abort_batches(exc)
|
||||
# if some partitions are already ready to be sent, the select time
|
||||
# would be 0; otherwise if some partition already has some data
|
||||
# accumulated but not ready yet, the select time will be the time
|
||||
# difference between now and its linger expiry time; otherwise the
|
||||
# select time will be the time difference between now and the
|
||||
# metadata expiry time
|
||||
self._client.poll(timeout_ms=poll_timeout_ms)
|
||||
|
||||
def initiate_close(self):
|
||||
"""Start closing the sender (won't complete until all data is sent)."""
|
||||
@@ -355,164 +180,82 @@ class Sender(threading.Thread):
|
||||
self._topics_to_add.add(topic)
|
||||
self.wakeup()
|
||||
|
||||
def _maybe_wait_for_producer_id(self):
|
||||
while not self._transaction_manager.has_producer_id():
|
||||
try:
|
||||
node_id = self._client.least_loaded_node()
|
||||
if node_id is None or not self._client.await_ready(node_id):
|
||||
log.debug("%s, Could not find an available broker to send InitProducerIdRequest to." +
|
||||
" Will back off and try again.", str(self))
|
||||
time.sleep(self._client.least_loaded_node_refresh_ms() / 1000)
|
||||
continue
|
||||
version = self._client.api_version(InitProducerIdRequest, max_version=1)
|
||||
request = InitProducerIdRequest[version](
|
||||
transactional_id=self.config['transactional_id'],
|
||||
transaction_timeout_ms=self.config['transaction_timeout_ms'],
|
||||
)
|
||||
response = self._client.send_and_receive(node_id, request)
|
||||
error_type = Errors.for_code(response.error_code)
|
||||
if error_type is Errors.NoError:
|
||||
self._transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch))
|
||||
break
|
||||
elif getattr(error_type, 'retriable', False):
|
||||
log.debug("%s: Retriable error from InitProducerId response: %s", str(self), error_type.__name__)
|
||||
if getattr(error_type, 'invalid_metadata', False):
|
||||
self._metadata.request_update()
|
||||
else:
|
||||
self._transaction_manager.transition_to_fatal_error(error_type())
|
||||
break
|
||||
except Errors.KafkaConnectionError:
|
||||
log.debug("%s: Broker %s disconnected while awaiting InitProducerId response", str(self), node_id)
|
||||
except Errors.RequestTimedOutError:
|
||||
log.debug("%s: InitProducerId request to node %s timed out", str(self), node_id)
|
||||
log.debug("%s: Retry InitProducerIdRequest in %sms.", str(self), self.config['retry_backoff_ms'])
|
||||
time.sleep(self.config['retry_backoff_ms'] / 1000)
|
||||
|
||||
def _failed_produce(self, batches, node_id, error):
|
||||
log.error("%s: Error sending produce request to node %d: %s", str(self), node_id, error) # trace
|
||||
log.debug("Error sending produce request to node %d: %s", node_id, error) # trace
|
||||
for batch in batches:
|
||||
self._complete_batch(batch, error, -1)
|
||||
self._complete_batch(batch, error, -1, None)
|
||||
|
||||
def _handle_produce_response(self, node_id, send_time, batches, response):
|
||||
"""Handle a produce response."""
|
||||
# if we have a response, parse it
|
||||
log.debug('%s: Parsing produce response: %r', str(self), response)
|
||||
log.debug('Parsing produce response: %r', response)
|
||||
if response:
|
||||
batches_by_partition = dict([(batch.topic_partition, batch)
|
||||
for batch in batches])
|
||||
|
||||
for topic, partitions in response.topics:
|
||||
for partition_info in partitions:
|
||||
global_error = None
|
||||
log_start_offset = None
|
||||
if response.API_VERSION < 2:
|
||||
partition, error_code, offset = partition_info
|
||||
ts = None
|
||||
elif 2 <= response.API_VERSION <= 4:
|
||||
partition, error_code, offset, ts = partition_info
|
||||
elif 5 <= response.API_VERSION <= 7:
|
||||
partition, error_code, offset, ts, _log_start_offset = partition_info
|
||||
partition, error_code, offset, ts, log_start_offset = partition_info
|
||||
else:
|
||||
# Currently unused / TODO: KIP-467
|
||||
partition, error_code, offset, ts, _log_start_offset, _record_errors, _global_error = partition_info
|
||||
# the ignored parameter is record_error of type list[(batch_index: int, error_message: str)]
|
||||
partition, error_code, offset, ts, log_start_offset, _, global_error = partition_info
|
||||
tp = TopicPartition(topic, partition)
|
||||
error = Errors.for_code(error_code)
|
||||
batch = batches_by_partition[tp]
|
||||
self._complete_batch(batch, error, offset, timestamp_ms=ts)
|
||||
self._complete_batch(batch, error, offset, ts, log_start_offset, global_error)
|
||||
|
||||
if response.API_VERSION > 0:
|
||||
self._sensors.record_throttle_time(response.throttle_time_ms, node=node_id)
|
||||
|
||||
else:
|
||||
# this is the acks = 0 case, just complete all requests
|
||||
for batch in batches:
|
||||
self._complete_batch(batch, None, -1)
|
||||
self._complete_batch(batch, None, -1, None)
|
||||
|
||||
def _fail_batch(self, batch, exception, base_offset=None, timestamp_ms=None):
|
||||
exception = exception if type(exception) is not type else exception()
|
||||
if self._transaction_manager:
|
||||
if isinstance(exception, Errors.OutOfOrderSequenceNumberError) and \
|
||||
not self._transaction_manager.is_transactional() and \
|
||||
self._transaction_manager.has_producer_id(batch.producer_id):
|
||||
log.error("%s: The broker received an out of order sequence number for topic-partition %s"
|
||||
" at offset %s. This indicates data loss on the broker, and should be investigated.",
|
||||
str(self), batch.topic_partition, base_offset)
|
||||
|
||||
# Reset the transaction state since we have hit an irrecoverable exception and cannot make any guarantees
|
||||
# about the previously committed message. Note that this will discard the producer id and sequence
|
||||
# numbers for all existing partitions.
|
||||
self._transaction_manager.reset_producer_id()
|
||||
elif isinstance(exception, (Errors.ClusterAuthorizationFailedError,
|
||||
Errors.TransactionalIdAuthorizationFailedError,
|
||||
Errors.ProducerFencedError,
|
||||
Errors.InvalidTxnStateError)):
|
||||
self._transaction_manager.transition_to_fatal_error(exception)
|
||||
elif self._transaction_manager.is_transactional():
|
||||
self._transaction_manager.transition_to_abortable_error(exception)
|
||||
|
||||
if self._sensors:
|
||||
self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
|
||||
|
||||
if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms, exception=exception):
|
||||
self._maybe_remove_from_inflight_batches(batch)
|
||||
self._accumulator.deallocate(batch)
|
||||
|
||||
def _complete_batch(self, batch, error, base_offset, timestamp_ms=None):
|
||||
def _complete_batch(self, batch, error, base_offset, timestamp_ms=None, log_start_offset=None, global_error=None):
|
||||
"""Complete or retry the given batch of records.
|
||||
|
||||
Arguments:
|
||||
batch (ProducerBatch): The record batch
|
||||
batch (RecordBatch): The record batch
|
||||
error (Exception): The error (or None if none)
|
||||
base_offset (int): The base offset assigned to the records if successful
|
||||
timestamp_ms (int, optional): The timestamp returned by the broker for this batch
|
||||
log_start_offset (int): The start offset of the log at the time this produce response was created
|
||||
global_error (str): The summarising error message
|
||||
"""
|
||||
# Standardize no-error to None
|
||||
if error is Errors.NoError:
|
||||
error = None
|
||||
|
||||
if error is not None:
|
||||
if self._can_retry(batch, error):
|
||||
# retry
|
||||
log.warning("%s: Got error produce response on topic-partition %s,"
|
||||
" retrying (%s attempts left). Error: %s",
|
||||
str(self), batch.topic_partition,
|
||||
self.config['retries'] - batch.attempts - 1,
|
||||
error)
|
||||
|
||||
# If idempotence is enabled only retry the request if the batch matches our current producer id and epoch
|
||||
if not self._transaction_manager or self._transaction_manager.producer_id_and_epoch.match(batch):
|
||||
log.debug("%s: Retrying batch to topic-partition %s. Sequence number: %s",
|
||||
str(self), batch.topic_partition,
|
||||
self._transaction_manager.sequence_number(batch.topic_partition) if self._transaction_manager else None)
|
||||
self._accumulator.reenqueue(batch)
|
||||
self._maybe_remove_from_inflight_batches(batch)
|
||||
if self._sensors:
|
||||
self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
|
||||
else:
|
||||
log.warning("%s: Attempted to retry sending a batch but the producer id/epoch changed from %s/%s to %s/%s. This batch will be dropped",
|
||||
str(self), batch.producer_id, batch.producer_epoch,
|
||||
self._transaction_manager.producer_id_and_epoch.producer_id,
|
||||
self._transaction_manager.producer_id_and_epoch.epoch)
|
||||
self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
|
||||
else:
|
||||
if error is Errors.TopicAuthorizationFailedError:
|
||||
error = error(batch.topic_partition.topic)
|
||||
|
||||
# tell the user the result of their request
|
||||
self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms)
|
||||
|
||||
if error is Errors.UnknownTopicOrPartitionError:
|
||||
log.warning("%s: Received unknown topic or partition error in produce request on partition %s."
|
||||
" The topic/partition may not exist or the user may not have Describe access to it",
|
||||
str(self), batch.topic_partition)
|
||||
|
||||
if getattr(error, 'invalid_metadata', False):
|
||||
self._metadata.request_update()
|
||||
|
||||
if error is not None and self._can_retry(batch, error):
|
||||
# retry
|
||||
log.warning("Got error produce response on topic-partition %s,"
|
||||
" retrying (%d attempts left). Error: %s",
|
||||
batch.topic_partition,
|
||||
self.config['retries'] - batch.attempts - 1,
|
||||
global_error or error)
|
||||
self._accumulator.reenqueue(batch)
|
||||
self._sensors.record_retries(batch.topic_partition.topic, batch.record_count)
|
||||
else:
|
||||
if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms):
|
||||
self._maybe_remove_from_inflight_batches(batch)
|
||||
self._accumulator.deallocate(batch)
|
||||
if error is Errors.TopicAuthorizationFailedError:
|
||||
error = error(batch.topic_partition.topic)
|
||||
|
||||
if self._transaction_manager and self._transaction_manager.producer_id_and_epoch.match(batch):
|
||||
self._transaction_manager.increment_sequence_number(batch.topic_partition, batch.record_count)
|
||||
log.debug("%s: Incremented sequence number for topic-partition %s to %s", str(self), batch.topic_partition,
|
||||
self._transaction_manager.sequence_number(batch.topic_partition))
|
||||
# tell the user the result of their request
|
||||
batch.done(base_offset, timestamp_ms, error, log_start_offset, global_error)
|
||||
self._accumulator.deallocate(batch)
|
||||
if error is not None:
|
||||
self._sensors.record_errors(batch.topic_partition.topic, batch.record_count)
|
||||
|
||||
if getattr(error, 'invalid_metadata', False):
|
||||
self._metadata.request_update()
|
||||
|
||||
# Unmute the completed partition.
|
||||
if self.config['guarantee_message_order']:
|
||||
@@ -523,10 +266,8 @@ class Sender(threading.Thread):
|
||||
We can retry a send if the error is transient and the number of
|
||||
attempts taken is fewer than the maximum allowed
|
||||
"""
|
||||
return (not batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms) and
|
||||
batch.attempts < self.config['retries'] and
|
||||
batch.final_state is None and
|
||||
getattr(error, 'retriable', False))
|
||||
return (batch.attempts < self.config['retries']
|
||||
and getattr(error, 'retriable', False))
|
||||
|
||||
def _create_produce_requests(self, collated):
|
||||
"""
|
||||
@@ -534,24 +275,23 @@ class Sender(threading.Thread):
|
||||
per-node basis.
|
||||
|
||||
Arguments:
|
||||
collated: {node_id: [ProducerBatch]}
|
||||
collated: {node_id: [RecordBatch]}
|
||||
|
||||
Returns:
|
||||
dict: {node_id: ProduceRequest} (version depends on client api_versions)
|
||||
dict: {node_id: ProduceRequest} (version depends on api_version)
|
||||
"""
|
||||
requests = {}
|
||||
for node_id, batches in six.iteritems(collated):
|
||||
if batches:
|
||||
requests[node_id] = self._produce_request(
|
||||
node_id, self.config['acks'],
|
||||
self.config['request_timeout_ms'], batches)
|
||||
requests[node_id] = self._produce_request(
|
||||
node_id, self.config['acks'],
|
||||
self.config['request_timeout_ms'], batches)
|
||||
return requests
|
||||
|
||||
def _produce_request(self, node_id, acks, timeout, batches):
|
||||
"""Create a produce request from the given record batches.
|
||||
|
||||
Returns:
|
||||
ProduceRequest (version depends on client api_versions)
|
||||
ProduceRequest (version depends on api_version)
|
||||
"""
|
||||
produce_records_by_partition = collections.defaultdict(dict)
|
||||
for batch in batches:
|
||||
@@ -561,26 +301,32 @@ class Sender(threading.Thread):
|
||||
buf = batch.records.buffer()
|
||||
produce_records_by_partition[topic][partition] = buf
|
||||
|
||||
version = self._client.api_version(ProduceRequest, max_version=7)
|
||||
topic_partition_data = [
|
||||
(topic, list(partition_info.items()))
|
||||
for topic, partition_info in six.iteritems(produce_records_by_partition)]
|
||||
transactional_id = self._transaction_manager.transactional_id if self._transaction_manager else None
|
||||
if version >= 3:
|
||||
return ProduceRequest[version](
|
||||
transactional_id=transactional_id,
|
||||
required_acks=acks,
|
||||
timeout=timeout,
|
||||
topics=topic_partition_data,
|
||||
)
|
||||
kwargs = {}
|
||||
if self.config['api_version'] >= (2, 1):
|
||||
version = 7
|
||||
elif self.config['api_version'] >= (2, 0):
|
||||
version = 6
|
||||
elif self.config['api_version'] >= (1, 1):
|
||||
version = 5
|
||||
elif self.config['api_version'] >= (1, 0):
|
||||
version = 4
|
||||
elif self.config['api_version'] >= (0, 11):
|
||||
version = 3
|
||||
kwargs = dict(transactional_id=None)
|
||||
elif self.config['api_version'] >= (0, 10):
|
||||
version = 2
|
||||
elif self.config['api_version'] == (0, 9):
|
||||
version = 1
|
||||
else:
|
||||
if transactional_id is not None:
|
||||
log.warning('%s: Broker does not support ProduceRequest v3+, required for transactional_id', str(self))
|
||||
return ProduceRequest[version](
|
||||
required_acks=acks,
|
||||
timeout=timeout,
|
||||
topics=topic_partition_data,
|
||||
)
|
||||
version = 0
|
||||
return ProduceRequest[version](
|
||||
required_acks=acks,
|
||||
timeout=timeout,
|
||||
topics=[(topic, list(partition_info.items()))
|
||||
for topic, partition_info
|
||||
in six.iteritems(produce_records_by_partition)],
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def wakeup(self):
|
||||
"""Wake up the selector associated with this send thread."""
|
||||
@@ -589,9 +335,6 @@ class Sender(threading.Thread):
|
||||
def bootstrap_connected(self):
|
||||
return self._client.bootstrap_connected()
|
||||
|
||||
def __str__(self):
|
||||
return "<Sender client_id=%s transactional_id=%s>" % (self.config['client_id'], self.config['transactional_id'])
|
||||
|
||||
|
||||
class SenderMetrics(object):
|
||||
|
||||
@@ -624,6 +367,15 @@ class SenderMetrics(object):
|
||||
sensor_name=sensor_name,
|
||||
description='The maximum time in ms record batches spent in the record accumulator.')
|
||||
|
||||
sensor_name = 'produce-throttle-time'
|
||||
self.produce_throttle_time_sensor = self.metrics.sensor(sensor_name)
|
||||
self.add_metric('produce-throttle-time-avg', Avg(),
|
||||
sensor_name=sensor_name,
|
||||
description='The average throttle time in ms')
|
||||
self.add_metric('produce-throttle-time-max', Max(),
|
||||
sensor_name=sensor_name,
|
||||
description='The maximum throttle time in ms')
|
||||
|
||||
sensor_name = 'records-per-request'
|
||||
self.records_per_request_sensor = self.metrics.sensor(sensor_name)
|
||||
self.add_metric('record-send-rate', Rate(),
|
||||
@@ -746,9 +498,8 @@ class SenderMetrics(object):
|
||||
records += batch.record_count
|
||||
total_bytes += batch.records.size_in_bytes()
|
||||
|
||||
if node_batch:
|
||||
self.records_per_request_sensor.record(records)
|
||||
self.byte_rate_sensor.record(total_bytes)
|
||||
self.records_per_request_sensor.record(records)
|
||||
self.byte_rate_sensor.record(total_bytes)
|
||||
|
||||
def record_retries(self, topic, count):
|
||||
self.retry_sensor.record(count)
|
||||
@@ -761,3 +512,6 @@ class SenderMetrics(object):
|
||||
sensor = self.metrics.get_sensor('topic.' + topic + '.record-errors')
|
||||
if sensor:
|
||||
sensor.record(count)
|
||||
|
||||
def record_throttle_time(self, throttle_time_ms, node=None):
|
||||
self.produce_throttle_time_sensor.record(throttle_time_ms)
|
||||
|
||||
@@ -1,981 +0,0 @@
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
import abc
|
||||
import collections
|
||||
import heapq
|
||||
import logging
|
||||
import threading
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
try:
|
||||
# enum in stdlib as of py3.4
|
||||
from enum import IntEnum # pylint: disable=import-error
|
||||
except ImportError:
|
||||
# vendored backport module
|
||||
from kafka.vendor.enum34 import IntEnum
|
||||
|
||||
import kafka.errors as Errors
|
||||
from kafka.protocol.add_offsets_to_txn import AddOffsetsToTxnRequest
|
||||
from kafka.protocol.add_partitions_to_txn import AddPartitionsToTxnRequest
|
||||
from kafka.protocol.end_txn import EndTxnRequest
|
||||
from kafka.protocol.find_coordinator import FindCoordinatorRequest
|
||||
from kafka.protocol.init_producer_id import InitProducerIdRequest
|
||||
from kafka.protocol.txn_offset_commit import TxnOffsetCommitRequest
|
||||
from kafka.structs import TopicPartition
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
NO_PRODUCER_ID = -1
|
||||
NO_PRODUCER_EPOCH = -1
|
||||
NO_SEQUENCE = -1
|
||||
|
||||
|
||||
class ProducerIdAndEpoch(object):
|
||||
__slots__ = ('producer_id', 'epoch')
|
||||
|
||||
def __init__(self, producer_id, epoch):
|
||||
self.producer_id = producer_id
|
||||
self.epoch = epoch
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
return NO_PRODUCER_ID < self.producer_id
|
||||
|
||||
def match(self, batch):
|
||||
return self.producer_id == batch.producer_id and self.epoch == batch.producer_epoch
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, ProducerIdAndEpoch) and self.producer_id == other.producer_id and self.epoch == other.epoch
|
||||
|
||||
def __str__(self):
|
||||
return "ProducerIdAndEpoch(producer_id={}, epoch={})".format(self.producer_id, self.epoch)
|
||||
|
||||
|
||||
class TransactionState(IntEnum):
|
||||
UNINITIALIZED = 0
|
||||
INITIALIZING = 1
|
||||
READY = 2
|
||||
IN_TRANSACTION = 3
|
||||
COMMITTING_TRANSACTION = 4
|
||||
ABORTING_TRANSACTION = 5
|
||||
ABORTABLE_ERROR = 6
|
||||
FATAL_ERROR = 7
|
||||
|
||||
@classmethod
|
||||
def is_transition_valid(cls, source, target):
|
||||
if target == cls.INITIALIZING:
|
||||
return source == cls.UNINITIALIZED
|
||||
elif target == cls.READY:
|
||||
return source in (cls.INITIALIZING, cls.COMMITTING_TRANSACTION, cls.ABORTING_TRANSACTION)
|
||||
elif target == cls.IN_TRANSACTION:
|
||||
return source == cls.READY
|
||||
elif target == cls.COMMITTING_TRANSACTION:
|
||||
return source == cls.IN_TRANSACTION
|
||||
elif target == cls.ABORTING_TRANSACTION:
|
||||
return source in (cls.IN_TRANSACTION, cls.ABORTABLE_ERROR)
|
||||
elif target == cls.ABORTABLE_ERROR:
|
||||
return source in (cls.IN_TRANSACTION, cls.COMMITTING_TRANSACTION, cls.ABORTABLE_ERROR)
|
||||
elif target == cls.UNINITIALIZED:
|
||||
# Disallow transitions to UNITIALIZED
|
||||
return False
|
||||
elif target == cls.FATAL_ERROR:
|
||||
# We can transition to FATAL_ERROR unconditionally.
|
||||
# FATAL_ERROR is never a valid starting state for any transition. So the only option is to close the
|
||||
# producer or do purely non transactional requests.
|
||||
return True
|
||||
|
||||
|
||||
class Priority(IntEnum):
|
||||
# We use the priority to determine the order in which requests need to be sent out. For instance, if we have
|
||||
# a pending FindCoordinator request, that must always go first. Next, If we need a producer id, that must go second.
|
||||
# The endTxn request must always go last.
|
||||
FIND_COORDINATOR = 0
|
||||
INIT_PRODUCER_ID = 1
|
||||
ADD_PARTITIONS_OR_OFFSETS = 2
|
||||
END_TXN = 3
|
||||
|
||||
|
||||
class TransactionManager(object):
|
||||
"""
|
||||
A class which maintains state for transactions. Also keeps the state necessary to ensure idempotent production.
|
||||
"""
|
||||
NO_INFLIGHT_REQUEST_CORRELATION_ID = -1
|
||||
# The retry_backoff_ms is overridden to the following value if the first AddPartitions receives a
|
||||
# CONCURRENT_TRANSACTIONS error.
|
||||
ADD_PARTITIONS_RETRY_BACKOFF_MS = 20
|
||||
|
||||
def __init__(self, transactional_id=None, transaction_timeout_ms=0, retry_backoff_ms=100, api_version=(0, 11), metadata=None):
|
||||
self._api_version = api_version
|
||||
self._metadata = metadata
|
||||
|
||||
self._sequence_numbers = collections.defaultdict(lambda: 0)
|
||||
|
||||
self.transactional_id = transactional_id
|
||||
self.transaction_timeout_ms = transaction_timeout_ms
|
||||
self._transaction_coordinator = None
|
||||
self._consumer_group_coordinator = None
|
||||
self._new_partitions_in_transaction = set()
|
||||
self._pending_partitions_in_transaction = set()
|
||||
self._partitions_in_transaction = set()
|
||||
self._pending_txn_offset_commits = dict()
|
||||
|
||||
self._current_state = TransactionState.UNINITIALIZED
|
||||
self._last_error = None
|
||||
self.producer_id_and_epoch = ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH)
|
||||
|
||||
self._transaction_started = False
|
||||
|
||||
self._pending_requests = [] # priority queue via heapq
|
||||
self._pending_requests_sort_id = 0
|
||||
self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID
|
||||
|
||||
# This is used by the TxnRequestHandlers to control how long to back off before a given request is retried.
|
||||
# For instance, this value is lowered by the AddPartitionsToTxnHandler when it receives a CONCURRENT_TRANSACTIONS
|
||||
# error for the first AddPartitionsRequest in a transaction.
|
||||
self.retry_backoff_ms = retry_backoff_ms
|
||||
self._lock = threading.Condition()
|
||||
|
||||
def initialize_transactions(self):
|
||||
with self._lock:
|
||||
self._ensure_transactional()
|
||||
self._transition_to(TransactionState.INITIALIZING)
|
||||
self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH))
|
||||
self._sequence_numbers.clear()
|
||||
handler = InitProducerIdHandler(self, self.transaction_timeout_ms)
|
||||
self._enqueue_request(handler)
|
||||
return handler.result
|
||||
|
||||
def begin_transaction(self):
|
||||
with self._lock:
|
||||
self._ensure_transactional()
|
||||
self._maybe_fail_with_error()
|
||||
self._transition_to(TransactionState.IN_TRANSACTION)
|
||||
|
||||
def begin_commit(self):
|
||||
with self._lock:
|
||||
self._ensure_transactional()
|
||||
self._maybe_fail_with_error()
|
||||
self._transition_to(TransactionState.COMMITTING_TRANSACTION)
|
||||
return self._begin_completing_transaction(True)
|
||||
|
||||
def begin_abort(self):
|
||||
with self._lock:
|
||||
self._ensure_transactional()
|
||||
if self._current_state != TransactionState.ABORTABLE_ERROR:
|
||||
self._maybe_fail_with_error()
|
||||
self._transition_to(TransactionState.ABORTING_TRANSACTION)
|
||||
|
||||
# We're aborting the transaction, so there should be no need to add new partitions
|
||||
self._new_partitions_in_transaction.clear()
|
||||
return self._begin_completing_transaction(False)
|
||||
|
||||
def _begin_completing_transaction(self, committed):
|
||||
if self._new_partitions_in_transaction:
|
||||
self._enqueue_request(self._add_partitions_to_transaction_handler())
|
||||
handler = EndTxnHandler(self, committed)
|
||||
self._enqueue_request(handler)
|
||||
return handler.result
|
||||
|
||||
def send_offsets_to_transaction(self, offsets, consumer_group_id):
|
||||
with self._lock:
|
||||
self._ensure_transactional()
|
||||
self._maybe_fail_with_error()
|
||||
if self._current_state != TransactionState.IN_TRANSACTION:
|
||||
raise Errors.KafkaError("Cannot send offsets to transaction because the producer is not in an active transaction")
|
||||
|
||||
log.debug("Begin adding offsets %s for consumer group %s to transaction", offsets, consumer_group_id)
|
||||
handler = AddOffsetsToTxnHandler(self, consumer_group_id, offsets)
|
||||
self._enqueue_request(handler)
|
||||
return handler.result
|
||||
|
||||
def maybe_add_partition_to_transaction(self, topic_partition):
|
||||
with self._lock:
|
||||
self._fail_if_not_ready_for_send()
|
||||
|
||||
if self.is_partition_added(topic_partition) or self.is_partition_pending_add(topic_partition):
|
||||
return
|
||||
|
||||
log.debug("Begin adding new partition %s to transaction", topic_partition)
|
||||
self._new_partitions_in_transaction.add(topic_partition)
|
||||
|
||||
def _fail_if_not_ready_for_send(self):
|
||||
with self._lock:
|
||||
if self.has_error():
|
||||
raise Errors.KafkaError(
|
||||
"Cannot perform send because at least one previous transactional or"
|
||||
" idempotent request has failed with errors.", self._last_error)
|
||||
|
||||
if self.is_transactional():
|
||||
if not self.has_producer_id():
|
||||
raise Errors.IllegalStateError(
|
||||
"Cannot perform a 'send' before completing a call to init_transactions"
|
||||
" when transactions are enabled.")
|
||||
|
||||
if self._current_state != TransactionState.IN_TRANSACTION:
|
||||
raise Errors.IllegalStateError("Cannot call send in state %s" % (self._current_state.name,))
|
||||
|
||||
def is_send_to_partition_allowed(self, tp):
|
||||
with self._lock:
|
||||
if self.has_fatal_error():
|
||||
return False
|
||||
return not self.is_transactional() or tp in self._partitions_in_transaction
|
||||
|
||||
def has_producer_id(self, producer_id=None):
|
||||
if producer_id is None:
|
||||
return self.producer_id_and_epoch.is_valid
|
||||
else:
|
||||
return self.producer_id_and_epoch.producer_id == producer_id
|
||||
|
||||
def is_transactional(self):
|
||||
return self.transactional_id is not None
|
||||
|
||||
def has_partitions_to_add(self):
|
||||
with self._lock:
|
||||
return bool(self._new_partitions_in_transaction) or bool(self._pending_partitions_in_transaction)
|
||||
|
||||
def is_completing(self):
|
||||
with self._lock:
|
||||
return self._current_state in (
|
||||
TransactionState.COMMITTING_TRANSACTION,
|
||||
TransactionState.ABORTING_TRANSACTION)
|
||||
|
||||
@property
|
||||
def last_error(self):
|
||||
return self._last_error
|
||||
|
||||
def has_error(self):
|
||||
with self._lock:
|
||||
return self._current_state in (
|
||||
TransactionState.ABORTABLE_ERROR,
|
||||
TransactionState.FATAL_ERROR)
|
||||
|
||||
def is_aborting(self):
|
||||
with self._lock:
|
||||
return self._current_state == TransactionState.ABORTING_TRANSACTION
|
||||
|
||||
def transition_to_abortable_error(self, exc):
|
||||
with self._lock:
|
||||
if self._current_state == TransactionState.ABORTING_TRANSACTION:
|
||||
log.debug("Skipping transition to abortable error state since the transaction is already being "
|
||||
" aborted. Underlying exception: %s", exc)
|
||||
return
|
||||
self._transition_to(TransactionState.ABORTABLE_ERROR, error=exc)
|
||||
|
||||
def transition_to_fatal_error(self, exc):
|
||||
with self._lock:
|
||||
self._transition_to(TransactionState.FATAL_ERROR, error=exc)
|
||||
|
||||
def is_partition_added(self, partition):
|
||||
with self._lock:
|
||||
return partition in self._partitions_in_transaction
|
||||
|
||||
def is_partition_pending_add(self, partition):
|
||||
return partition in self._new_partitions_in_transaction or partition in self._pending_partitions_in_transaction
|
||||
|
||||
def has_producer_id_and_epoch(self, producer_id, producer_epoch):
|
||||
return (
|
||||
self.producer_id_and_epoch.producer_id == producer_id and
|
||||
self.producer_id_and_epoch.epoch == producer_epoch
|
||||
)
|
||||
|
||||
def set_producer_id_and_epoch(self, producer_id_and_epoch):
|
||||
if not isinstance(producer_id_and_epoch, ProducerIdAndEpoch):
|
||||
raise TypeError("ProducerAndIdEpoch type required")
|
||||
log.info("ProducerId set to %s with epoch %s",
|
||||
producer_id_and_epoch.producer_id, producer_id_and_epoch.epoch)
|
||||
self.producer_id_and_epoch = producer_id_and_epoch
|
||||
|
||||
def reset_producer_id(self):
|
||||
"""
|
||||
This method is used when the producer needs to reset its internal state because of an irrecoverable exception
|
||||
from the broker.
|
||||
|
||||
We need to reset the producer id and associated state when we have sent a batch to the broker, but we either get
|
||||
a non-retriable exception or we run out of retries, or the batch expired in the producer queue after it was already
|
||||
sent to the broker.
|
||||
|
||||
In all of these cases, we don't know whether batch was actually committed on the broker, and hence whether the
|
||||
sequence number was actually updated. If we don't reset the producer state, we risk the chance that all future
|
||||
messages will return an OutOfOrderSequenceNumberError.
|
||||
|
||||
Note that we can't reset the producer state for the transactional producer as this would mean bumping the epoch
|
||||
for the same producer id. This might involve aborting the ongoing transaction during the initProducerIdRequest,
|
||||
and the user would not have any way of knowing this happened. So for the transactional producer,
|
||||
it's best to return the produce error to the user and let them abort the transaction and close the producer explicitly.
|
||||
"""
|
||||
with self._lock:
|
||||
if self.is_transactional():
|
||||
raise Errors.IllegalStateError(
|
||||
"Cannot reset producer state for a transactional producer."
|
||||
" You must either abort the ongoing transaction or"
|
||||
" reinitialize the transactional producer instead")
|
||||
self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH))
|
||||
self._sequence_numbers.clear()
|
||||
|
||||
def sequence_number(self, tp):
|
||||
with self._lock:
|
||||
return self._sequence_numbers[tp]
|
||||
|
||||
def increment_sequence_number(self, tp, increment):
|
||||
with self._lock:
|
||||
if tp not in self._sequence_numbers:
|
||||
raise Errors.IllegalStateError("Attempt to increment sequence number for a partition with no current sequence.")
|
||||
# Sequence number wraps at java max int
|
||||
base = self._sequence_numbers[tp]
|
||||
if base > (2147483647 - increment):
|
||||
self._sequence_numbers[tp] = increment - (2147483647 - base) - 1
|
||||
else:
|
||||
self._sequence_numbers[tp] += increment
|
||||
|
||||
def next_request_handler(self, has_incomplete_batches):
|
||||
with self._lock:
|
||||
if self._new_partitions_in_transaction:
|
||||
self._enqueue_request(self._add_partitions_to_transaction_handler())
|
||||
|
||||
if not self._pending_requests:
|
||||
return None
|
||||
|
||||
_, _, next_request_handler = self._pending_requests[0]
|
||||
# Do not send the EndTxn until all batches have been flushed
|
||||
if isinstance(next_request_handler, EndTxnHandler) and has_incomplete_batches:
|
||||
return None
|
||||
|
||||
heapq.heappop(self._pending_requests)
|
||||
if self._maybe_terminate_request_with_error(next_request_handler):
|
||||
log.debug("Not sending transactional request %s because we are in an error state",
|
||||
next_request_handler.request)
|
||||
return None
|
||||
|
||||
if isinstance(next_request_handler, EndTxnHandler) and not self._transaction_started:
|
||||
next_request_handler.result.done()
|
||||
if self._current_state != TransactionState.FATAL_ERROR:
|
||||
log.debug("Not sending EndTxn for completed transaction since no partitions"
|
||||
" or offsets were successfully added")
|
||||
self._complete_transaction()
|
||||
try:
|
||||
_, _, next_request_handler = heapq.heappop(self._pending_requests)
|
||||
except IndexError:
|
||||
next_request_handler = None
|
||||
|
||||
if next_request_handler:
|
||||
log.debug("Request %s dequeued for sending", next_request_handler.request)
|
||||
|
||||
return next_request_handler
|
||||
|
||||
def retry(self, request):
|
||||
with self._lock:
|
||||
request.set_retry()
|
||||
self._enqueue_request(request)
|
||||
|
||||
def authentication_failed(self, exc):
|
||||
with self._lock:
|
||||
for _, _, request in self._pending_requests:
|
||||
request.fatal_error(exc)
|
||||
|
||||
def coordinator(self, coord_type):
|
||||
if coord_type == 'group':
|
||||
return self._consumer_group_coordinator
|
||||
elif coord_type == 'transaction':
|
||||
return self._transaction_coordinator
|
||||
else:
|
||||
raise Errors.IllegalStateError("Received an invalid coordinator type: %s" % (coord_type,))
|
||||
|
||||
def lookup_coordinator_for_request(self, request):
|
||||
self._lookup_coordinator(request.coordinator_type, request.coordinator_key)
|
||||
|
||||
def next_in_flight_request_correlation_id(self):
|
||||
self._in_flight_request_correlation_id += 1
|
||||
return self._in_flight_request_correlation_id
|
||||
|
||||
def clear_in_flight_transactional_request_correlation_id(self):
|
||||
self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID
|
||||
|
||||
def has_in_flight_transactional_request(self):
|
||||
return self._in_flight_request_correlation_id != self.NO_INFLIGHT_REQUEST_CORRELATION_ID
|
||||
|
||||
def has_fatal_error(self):
|
||||
return self._current_state == TransactionState.FATAL_ERROR
|
||||
|
||||
def has_abortable_error(self):
|
||||
return self._current_state == TransactionState.ABORTABLE_ERROR
|
||||
|
||||
# visible for testing
|
||||
def _test_transaction_contains_partition(self, tp):
|
||||
with self._lock:
|
||||
return tp in self._partitions_in_transaction
|
||||
|
||||
# visible for testing
|
||||
def _test_has_pending_offset_commits(self):
|
||||
return bool(self._pending_txn_offset_commits)
|
||||
|
||||
# visible for testing
|
||||
def _test_has_ongoing_transaction(self):
|
||||
with self._lock:
|
||||
# transactions are considered ongoing once started until completion or a fatal error
|
||||
return self._current_state == TransactionState.IN_TRANSACTION or self.is_completing() or self.has_abortable_error()
|
||||
|
||||
# visible for testing
|
||||
def _test_is_ready(self):
|
||||
with self._lock:
|
||||
return self.is_transactional() and self._current_state == TransactionState.READY
|
||||
|
||||
def _transition_to(self, target, error=None):
|
||||
with self._lock:
|
||||
if not self._current_state.is_transition_valid(self._current_state, target):
|
||||
raise Errors.KafkaError("TransactionalId %s: Invalid transition attempted from state %s to state %s" % (
|
||||
self.transactional_id, self._current_state.name, target.name))
|
||||
|
||||
if target in (TransactionState.FATAL_ERROR, TransactionState.ABORTABLE_ERROR):
|
||||
if error is None:
|
||||
raise Errors.IllegalArgumentError("Cannot transition to %s with an None exception" % (target.name,))
|
||||
self._last_error = error
|
||||
else:
|
||||
self._last_error = None
|
||||
|
||||
if self._last_error is not None:
|
||||
log.debug("Transition from state %s to error state %s (%s)", self._current_state.name, target.name, self._last_error)
|
||||
else:
|
||||
log.debug("Transition from state %s to %s", self._current_state, target)
|
||||
self._current_state = target
|
||||
|
||||
def _ensure_transactional(self):
|
||||
if not self.is_transactional():
|
||||
raise Errors.IllegalStateError("Transactional method invoked on a non-transactional producer.")
|
||||
|
||||
def _maybe_fail_with_error(self):
|
||||
if self.has_error():
|
||||
raise Errors.KafkaError("Cannot execute transactional method because we are in an error state: %s" % (self._last_error,))
|
||||
|
||||
def _maybe_terminate_request_with_error(self, request_handler):
|
||||
if self.has_error():
|
||||
if self.has_abortable_error() and isinstance(request_handler, FindCoordinatorHandler):
|
||||
# No harm letting the FindCoordinator request go through if we're expecting to abort
|
||||
return False
|
||||
request_handler.fail(self._last_error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _next_pending_requests_sort_id(self):
|
||||
self._pending_requests_sort_id += 1
|
||||
return self._pending_requests_sort_id
|
||||
|
||||
def _enqueue_request(self, request_handler):
|
||||
log.debug("Enqueuing transactional request %s", request_handler.request)
|
||||
heapq.heappush(
|
||||
self._pending_requests,
|
||||
(
|
||||
request_handler.priority, # keep lowest priority at head of queue
|
||||
self._next_pending_requests_sort_id(), # break ties
|
||||
request_handler
|
||||
)
|
||||
)
|
||||
|
||||
def _lookup_coordinator(self, coord_type, coord_key):
|
||||
with self._lock:
|
||||
if coord_type == 'group':
|
||||
self._consumer_group_coordinator = None
|
||||
elif coord_type == 'transaction':
|
||||
self._transaction_coordinator = None
|
||||
else:
|
||||
raise Errors.IllegalStateError("Invalid coordinator type: %s" % (coord_type,))
|
||||
self._enqueue_request(FindCoordinatorHandler(self, coord_type, coord_key))
|
||||
|
||||
def _complete_transaction(self):
|
||||
with self._lock:
|
||||
self._transition_to(TransactionState.READY)
|
||||
self._transaction_started = False
|
||||
self._new_partitions_in_transaction.clear()
|
||||
self._pending_partitions_in_transaction.clear()
|
||||
self._partitions_in_transaction.clear()
|
||||
|
||||
def _add_partitions_to_transaction_handler(self):
|
||||
with self._lock:
|
||||
self._pending_partitions_in_transaction.update(self._new_partitions_in_transaction)
|
||||
self._new_partitions_in_transaction.clear()
|
||||
return AddPartitionsToTxnHandler(self, self._pending_partitions_in_transaction)
|
||||
|
||||
|
||||
class TransactionalRequestResult(object):
|
||||
def __init__(self):
|
||||
self._latch = threading.Event()
|
||||
self._error = None
|
||||
|
||||
def done(self, error=None):
|
||||
self._error = error
|
||||
self._latch.set()
|
||||
|
||||
def wait(self, timeout_ms=None):
|
||||
timeout = timeout_ms / 1000 if timeout_ms is not None else None
|
||||
success = self._latch.wait(timeout)
|
||||
if self._error:
|
||||
raise self._error
|
||||
return success
|
||||
|
||||
@property
|
||||
def is_done(self):
|
||||
return self._latch.is_set()
|
||||
|
||||
@property
|
||||
def succeeded(self):
|
||||
return self._latch.is_set() and self._error is None
|
||||
|
||||
@property
|
||||
def failed(self):
|
||||
return self._latch.is_set() and self._error is not None
|
||||
|
||||
@property
|
||||
def exception(self):
|
||||
return self._error
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class TxnRequestHandler(object):
|
||||
def __init__(self, transaction_manager, result=None):
|
||||
self.transaction_manager = transaction_manager
|
||||
self.retry_backoff_ms = transaction_manager.retry_backoff_ms
|
||||
self.request = None
|
||||
self._result = result or TransactionalRequestResult()
|
||||
self._is_retry = False
|
||||
|
||||
@property
|
||||
def transactional_id(self):
|
||||
return self.transaction_manager.transactional_id
|
||||
|
||||
@property
|
||||
def producer_id(self):
|
||||
return self.transaction_manager.producer_id_and_epoch.producer_id
|
||||
|
||||
@property
|
||||
def producer_epoch(self):
|
||||
return self.transaction_manager.producer_id_and_epoch.epoch
|
||||
|
||||
def fatal_error(self, exc):
|
||||
self.transaction_manager.transition_to_fatal_error(exc)
|
||||
self._result.done(error=exc)
|
||||
|
||||
def abortable_error(self, exc):
|
||||
self.transaction_manager.transition_to_abortable_error(exc)
|
||||
self._result.done(error=exc)
|
||||
|
||||
def fail(self, exc):
|
||||
self._result.done(error=exc)
|
||||
|
||||
def reenqueue(self):
|
||||
with self.transaction_manager._lock:
|
||||
self._is_retry = True
|
||||
self.transaction_manager._enqueue_request(self)
|
||||
|
||||
def on_complete(self, correlation_id, response_or_exc):
|
||||
if correlation_id != self.transaction_manager._in_flight_request_correlation_id:
|
||||
self.fatal_error(RuntimeError("Detected more than one in-flight transactional request."))
|
||||
else:
|
||||
self.transaction_manager.clear_in_flight_transactional_request_correlation_id()
|
||||
if isinstance(response_or_exc, Errors.KafkaConnectionError):
|
||||
log.debug("Disconnected from node. Will retry.")
|
||||
if self.needs_coordinator():
|
||||
self.transaction_manager._lookup_coordinator(self.coordinator_type, self.coordinator_key)
|
||||
self.reenqueue()
|
||||
elif isinstance(response_or_exc, Errors.UnsupportedVersionError):
|
||||
self.fatal_error(response_or_exc)
|
||||
elif not isinstance(response_or_exc, (Exception, type(None))):
|
||||
log.debug("Received transactional response %s for request %s", response_or_exc, self.request)
|
||||
with self.transaction_manager._lock:
|
||||
self.handle_response(response_or_exc)
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError("Could not execute transactional request for unknown reasons: %s" % response_or_exc))
|
||||
|
||||
def needs_coordinator(self):
|
||||
return self.coordinator_type is not None
|
||||
|
||||
@property
|
||||
def result(self):
|
||||
return self._result
|
||||
|
||||
@property
|
||||
def coordinator_type(self):
|
||||
return 'transaction'
|
||||
|
||||
@property
|
||||
def coordinator_key(self):
|
||||
return self.transaction_manager.transactional_id
|
||||
|
||||
def set_retry(self):
|
||||
self._is_retry = True
|
||||
|
||||
@property
|
||||
def is_retry(self):
|
||||
return self._is_retry
|
||||
|
||||
@abc.abstractmethod
|
||||
def handle_response(self, response):
|
||||
pass
|
||||
|
||||
@abc.abstractproperty
|
||||
def priority(self):
|
||||
pass
|
||||
|
||||
|
||||
class InitProducerIdHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, transaction_timeout_ms):
|
||||
super(InitProducerIdHandler, self).__init__(transaction_manager)
|
||||
|
||||
if transaction_manager._api_version >= (2, 0):
|
||||
version = 1
|
||||
else:
|
||||
version = 0
|
||||
self.request = InitProducerIdRequest[version](
|
||||
transactional_id=self.transactional_id,
|
||||
transaction_timeout_ms=transaction_timeout_ms)
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.INIT_PRODUCER_ID
|
||||
|
||||
def handle_response(self, response):
|
||||
error = Errors.for_code(response.error_code)
|
||||
|
||||
if error is Errors.NoError:
|
||||
self.transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch))
|
||||
self.transaction_manager._transition_to(TransactionState.READY)
|
||||
self._result.done()
|
||||
elif error in (Errors.NotCoordinatorError, Errors.CoordinatorNotAvailableError):
|
||||
self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
|
||||
self.reenqueue()
|
||||
elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
|
||||
self.reenqueue()
|
||||
elif error is Errors.TransactionalIdAuthorizationFailedError:
|
||||
self.fatal_error(error())
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError("Unexpected error in InitProducerIdResponse: %s" % (error())))
|
||||
|
||||
class AddPartitionsToTxnHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, topic_partitions):
|
||||
super(AddPartitionsToTxnHandler, self).__init__(transaction_manager)
|
||||
|
||||
if transaction_manager._api_version >= (2, 7):
|
||||
version = 2
|
||||
elif transaction_manager._api_version >= (2, 0):
|
||||
version = 1
|
||||
else:
|
||||
version = 0
|
||||
topic_data = collections.defaultdict(list)
|
||||
for tp in topic_partitions:
|
||||
topic_data[tp.topic].append(tp.partition)
|
||||
self.request = AddPartitionsToTxnRequest[version](
|
||||
transactional_id=self.transactional_id,
|
||||
producer_id=self.producer_id,
|
||||
producer_epoch=self.producer_epoch,
|
||||
topics=list(topic_data.items()))
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.ADD_PARTITIONS_OR_OFFSETS
|
||||
|
||||
def handle_response(self, response):
|
||||
has_partition_errors = False
|
||||
unauthorized_topics = set()
|
||||
self.retry_backoff_ms = self.transaction_manager.retry_backoff_ms
|
||||
|
||||
results = {TopicPartition(topic, partition): Errors.for_code(error_code)
|
||||
for topic, partition_data in response.results
|
||||
for partition, error_code in partition_data}
|
||||
|
||||
for tp, error in six.iteritems(results):
|
||||
if error is Errors.NoError:
|
||||
continue
|
||||
elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
|
||||
self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
|
||||
self.reenqueue()
|
||||
return
|
||||
elif error is Errors.ConcurrentTransactionsError:
|
||||
self.maybe_override_retry_backoff_ms()
|
||||
self.reenqueue()
|
||||
return
|
||||
elif error in (Errors.CoordinatorLoadInProgressError, Errors.UnknownTopicOrPartitionError):
|
||||
self.reenqueue()
|
||||
return
|
||||
elif error is Errors.InvalidProducerEpochError:
|
||||
self.fatal_error(error())
|
||||
return
|
||||
elif error is Errors.TransactionalIdAuthorizationFailedError:
|
||||
self.fatal_error(error())
|
||||
return
|
||||
elif error in (Errors.InvalidProducerIdMappingError, Errors.InvalidTxnStateError):
|
||||
self.fatal_error(Errors.KafkaError(error()))
|
||||
return
|
||||
elif error is Errors.TopicAuthorizationFailedError:
|
||||
unauthorized_topics.add(tp.topic)
|
||||
elif error is Errors.OperationNotAttemptedError:
|
||||
log.debug("Did not attempt to add partition %s to transaction because other partitions in the"
|
||||
" batch had errors.", tp)
|
||||
has_partition_errors = True
|
||||
else:
|
||||
log.error("Could not add partition %s due to unexpected error %s", tp, error())
|
||||
has_partition_errors = True
|
||||
|
||||
partitions = set(results)
|
||||
|
||||
# Remove the partitions from the pending set regardless of the result. We use the presence
|
||||
# of partitions in the pending set to know when it is not safe to send batches. However, if
|
||||
# the partitions failed to be added and we enter an error state, we expect the batches to be
|
||||
# aborted anyway. In this case, we must be able to continue sending the batches which are in
|
||||
# retry for partitions that were successfully added.
|
||||
self.transaction_manager._pending_partitions_in_transaction -= partitions
|
||||
|
||||
if unauthorized_topics:
|
||||
self.abortable_error(Errors.TopicAuthorizationFailedError(unauthorized_topics))
|
||||
elif has_partition_errors:
|
||||
self.abortable_error(Errors.KafkaError("Could not add partitions to transaction due to errors: %s" % (results)))
|
||||
else:
|
||||
log.debug("Successfully added partitions %s to transaction", partitions)
|
||||
self.transaction_manager._partitions_in_transaction.update(partitions)
|
||||
self.transaction_manager._transaction_started = True
|
||||
self._result.done()
|
||||
|
||||
def maybe_override_retry_backoff_ms(self):
|
||||
# We only want to reduce the backoff when retrying the first AddPartition which errored out due to a
|
||||
# CONCURRENT_TRANSACTIONS error since this means that the previous transaction is still completing and
|
||||
# we don't want to wait too long before trying to start the new one.
|
||||
#
|
||||
# This is only a temporary fix, the long term solution is being tracked in
|
||||
# https://issues.apache.org/jira/browse/KAFKA-5482
|
||||
if not self.transaction_manager._partitions_in_transaction:
|
||||
self.retry_backoff_ms = min(self.transaction_manager.ADD_PARTITIONS_RETRY_BACKOFF_MS, self.retry_backoff_ms)
|
||||
|
||||
|
||||
class FindCoordinatorHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, coord_type, coord_key):
|
||||
super(FindCoordinatorHandler, self).__init__(transaction_manager)
|
||||
|
||||
self._coord_type = coord_type
|
||||
self._coord_key = coord_key
|
||||
if transaction_manager._api_version >= (2, 0):
|
||||
version = 2
|
||||
else:
|
||||
version = 1
|
||||
if coord_type == 'group':
|
||||
coord_type_int8 = 0
|
||||
elif coord_type == 'transaction':
|
||||
coord_type_int8 = 1
|
||||
else:
|
||||
raise ValueError("Unrecognized coordinator type: %s" % (coord_type,))
|
||||
self.request = FindCoordinatorRequest[version](
|
||||
coordinator_key=coord_key,
|
||||
coordinator_type=coord_type_int8,
|
||||
)
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.FIND_COORDINATOR
|
||||
|
||||
@property
|
||||
def coordinator_type(self):
|
||||
return None
|
||||
|
||||
@property
|
||||
def coordinator_key(self):
|
||||
return None
|
||||
|
||||
def handle_response(self, response):
|
||||
error = Errors.for_code(response.error_code)
|
||||
|
||||
if error is Errors.NoError:
|
||||
coordinator_id = self.transaction_manager._metadata.add_coordinator(
|
||||
response, self._coord_type, self._coord_key)
|
||||
if self._coord_type == 'group':
|
||||
self.transaction_manager._consumer_group_coordinator = coordinator_id
|
||||
elif self._coord_type == 'transaction':
|
||||
self.transaction_manager._transaction_coordinator = coordinator_id
|
||||
self._result.done()
|
||||
elif error is Errors.CoordinatorNotAvailableError:
|
||||
self.reenqueue()
|
||||
elif error is Errors.TransactionalIdAuthorizationFailedError:
|
||||
self.fatal_error(error())
|
||||
elif error is Errors.GroupAuthorizationFailedError:
|
||||
self.abortable_error(error(self._coord_key))
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError(
|
||||
"Could not find a coordinator with type %s with key %s due to"
|
||||
" unexpected error: %s" % (self._coord_type, self._coord_key, error())))
|
||||
|
||||
|
||||
class EndTxnHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, committed):
|
||||
super(EndTxnHandler, self).__init__(transaction_manager)
|
||||
|
||||
if self.transaction_manager._api_version >= (2, 7):
|
||||
version = 2
|
||||
elif self.transaction_manager._api_version >= (2, 0):
|
||||
version = 1
|
||||
else:
|
||||
version = 0
|
||||
self.request = EndTxnRequest[version](
|
||||
transactional_id=self.transactional_id,
|
||||
producer_id=self.producer_id,
|
||||
producer_epoch=self.producer_epoch,
|
||||
committed=committed)
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.END_TXN
|
||||
|
||||
def handle_response(self, response):
|
||||
error = Errors.for_code(response.error_code)
|
||||
|
||||
if error is Errors.NoError:
|
||||
self.transaction_manager._complete_transaction()
|
||||
self._result.done()
|
||||
elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
|
||||
self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
|
||||
self.reenqueue()
|
||||
elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
|
||||
self.reenqueue()
|
||||
elif error is Errors.InvalidProducerEpochError:
|
||||
self.fatal_error(error())
|
||||
elif error is Errors.TransactionalIdAuthorizationFailedError:
|
||||
self.fatal_error(error())
|
||||
elif error is Errors.InvalidTxnStateError:
|
||||
self.fatal_error(error())
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError("Unhandled error in EndTxnResponse: %s" % (error())))
|
||||
|
||||
|
||||
class AddOffsetsToTxnHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, consumer_group_id, offsets):
|
||||
super(AddOffsetsToTxnHandler, self).__init__(transaction_manager)
|
||||
|
||||
self.consumer_group_id = consumer_group_id
|
||||
self.offsets = offsets
|
||||
if self.transaction_manager._api_version >= (2, 7):
|
||||
version = 2
|
||||
elif self.transaction_manager._api_version >= (2, 0):
|
||||
version = 1
|
||||
else:
|
||||
version = 0
|
||||
self.request = AddOffsetsToTxnRequest[version](
|
||||
transactional_id=self.transactional_id,
|
||||
producer_id=self.producer_id,
|
||||
producer_epoch=self.producer_epoch,
|
||||
group_id=consumer_group_id)
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.ADD_PARTITIONS_OR_OFFSETS
|
||||
|
||||
def handle_response(self, response):
|
||||
error = Errors.for_code(response.error_code)
|
||||
|
||||
if error is Errors.NoError:
|
||||
log.debug("Successfully added partition for consumer group %s to transaction", self.consumer_group_id)
|
||||
|
||||
# note the result is not completed until the TxnOffsetCommit returns
|
||||
for tp, offset in six.iteritems(self.offsets):
|
||||
self.transaction_manager._pending_txn_offset_commits[tp] = offset
|
||||
handler = TxnOffsetCommitHandler(self.transaction_manager, self.consumer_group_id,
|
||||
self.transaction_manager._pending_txn_offset_commits, self._result)
|
||||
self.transaction_manager._enqueue_request(handler)
|
||||
self.transaction_manager._transaction_started = True
|
||||
elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError):
|
||||
self.transaction_manager._lookup_coordinator('transaction', self.transactional_id)
|
||||
self.reenqueue()
|
||||
elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError):
|
||||
self.reenqueue()
|
||||
elif error is Errors.InvalidProducerEpochError:
|
||||
self.fatal_error(error())
|
||||
elif error is Errors.TransactionalIdAuthorizationFailedError:
|
||||
self.fatal_error(error())
|
||||
elif error is Errors.GroupAuthorizationFailedError:
|
||||
self.abortable_error(error(self.consumer_group_id))
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError("Unexpected error in AddOffsetsToTxnResponse: %s" % (error())))
|
||||
|
||||
|
||||
class TxnOffsetCommitHandler(TxnRequestHandler):
|
||||
def __init__(self, transaction_manager, consumer_group_id, offsets, result):
|
||||
super(TxnOffsetCommitHandler, self).__init__(transaction_manager, result=result)
|
||||
|
||||
self.consumer_group_id = consumer_group_id
|
||||
self.offsets = offsets
|
||||
self.request = self._build_request()
|
||||
|
||||
def _build_request(self):
|
||||
if self.transaction_manager._api_version >= (2, 1):
|
||||
version = 2
|
||||
elif self.transaction_manager._api_version >= (2, 0):
|
||||
version = 1
|
||||
else:
|
||||
version = 0
|
||||
|
||||
topic_data = collections.defaultdict(list)
|
||||
for tp, offset in six.iteritems(self.offsets):
|
||||
if version >= 2:
|
||||
partition_data = (tp.partition, offset.offset, offset.leader_epoch, offset.metadata)
|
||||
else:
|
||||
partition_data = (tp.partition, offset.offset, offset.metadata)
|
||||
topic_data[tp.topic].append(partition_data)
|
||||
|
||||
return TxnOffsetCommitRequest[version](
|
||||
transactional_id=self.transactional_id,
|
||||
group_id=self.consumer_group_id,
|
||||
producer_id=self.producer_id,
|
||||
producer_epoch=self.producer_epoch,
|
||||
topics=list(topic_data.items()))
|
||||
|
||||
@property
|
||||
def priority(self):
|
||||
return Priority.ADD_PARTITIONS_OR_OFFSETS
|
||||
|
||||
@property
|
||||
def coordinator_type(self):
|
||||
return 'group'
|
||||
|
||||
@property
|
||||
def coordinator_key(self):
|
||||
return self.consumer_group_id
|
||||
|
||||
def handle_response(self, response):
|
||||
lookup_coordinator = False
|
||||
retriable_failure = False
|
||||
|
||||
errors = {TopicPartition(topic, partition): Errors.for_code(error_code)
|
||||
for topic, partition_data in response.topics
|
||||
for partition, error_code in partition_data}
|
||||
|
||||
for tp, error in six.iteritems(errors):
|
||||
if error is Errors.NoError:
|
||||
log.debug("Successfully added offsets for %s from consumer group %s to transaction.",
|
||||
tp, self.consumer_group_id)
|
||||
del self.transaction_manager._pending_txn_offset_commits[tp]
|
||||
elif error in (errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError, Errors.RequestTimedOutError):
|
||||
retriable_failure = True
|
||||
lookup_coordinator = True
|
||||
elif error is Errors.UnknownTopicOrPartitionError:
|
||||
retriable_failure = True
|
||||
elif error is Errors.GroupAuthorizationFailedError:
|
||||
self.abortable_error(error(self.consumer_group_id))
|
||||
return
|
||||
elif error in (Errors.TransactionalIdAuthorizationFailedError,
|
||||
Errors.InvalidProducerEpochError,
|
||||
Errors.UnsupportedForMessageFormatError):
|
||||
self.fatal_error(error())
|
||||
return
|
||||
else:
|
||||
self.fatal_error(Errors.KafkaError("Unexpected error in TxnOffsetCommitResponse: %s" % (error())))
|
||||
return
|
||||
|
||||
if lookup_coordinator:
|
||||
self.transaction_manager._lookup_coordinator('group', self.consumer_group_id)
|
||||
|
||||
if not retriable_failure:
|
||||
# all attempted partitions were either successful, or there was a fatal failure.
|
||||
# either way, we are not retrying, so complete the request.
|
||||
self.result.done()
|
||||
|
||||
# retry the commits which failed with a retriable error.
|
||||
elif self.transaction_manager._pending_txn_offset_commits:
|
||||
self.offsets = self.transaction_manager._pending_txn_offset_commits
|
||||
self.request = self._build_request()
|
||||
self.reenqueue()
|
||||
@@ -43,7 +43,4 @@ API_KEYS = {
|
||||
40: 'ExpireDelegationToken',
|
||||
41: 'DescribeDelegationToken',
|
||||
42: 'DeleteGroups',
|
||||
45: 'AlterPartitionReassignments',
|
||||
46: 'ListPartitionReassignments',
|
||||
48: 'DescribeClientQuotas',
|
||||
}
|
||||
|
||||
@@ -2,11 +2,10 @@ from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class AbstractType(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def encode(cls, value): # pylint: disable=no-self-argument
|
||||
pass
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class AddOffsetsToTxnResponse_v0(Response):
|
||||
API_KEY = 25
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
)
|
||||
|
||||
|
||||
class AddOffsetsToTxnResponse_v1(Response):
|
||||
API_KEY = 25
|
||||
API_VERSION = 1
|
||||
SCHEMA = AddOffsetsToTxnResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class AddOffsetsToTxnResponse_v2(Response):
|
||||
API_KEY = 25
|
||||
API_VERSION = 2
|
||||
SCHEMA = AddOffsetsToTxnResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class AddOffsetsToTxnRequest_v0(Request):
|
||||
API_KEY = 25
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = AddOffsetsToTxnResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
('group_id', String('utf-8')),
|
||||
)
|
||||
|
||||
|
||||
class AddOffsetsToTxnRequest_v1(Request):
|
||||
API_KEY = 25
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = AddOffsetsToTxnResponse_v1
|
||||
SCHEMA = AddOffsetsToTxnRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class AddOffsetsToTxnRequest_v2(Request):
|
||||
API_KEY = 25
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = AddOffsetsToTxnResponse_v2
|
||||
SCHEMA = AddOffsetsToTxnRequest_v1.SCHEMA
|
||||
|
||||
|
||||
AddOffsetsToTxnRequest = [
|
||||
AddOffsetsToTxnRequest_v0, AddOffsetsToTxnRequest_v1, AddOffsetsToTxnRequest_v2,
|
||||
]
|
||||
AddOffsetsToTxnResponse = [
|
||||
AddOffsetsToTxnResponse_v0, AddOffsetsToTxnResponse_v1, AddOffsetsToTxnResponse_v2,
|
||||
]
|
||||
@@ -1,63 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class AddPartitionsToTxnResponse_v0(Response):
|
||||
API_KEY = 24
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('results', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('error_code', Int16))))))
|
||||
|
||||
|
||||
class AddPartitionsToTxnResponse_v1(Response):
|
||||
API_KEY = 24
|
||||
API_VERSION = 1
|
||||
SCHEMA = AddPartitionsToTxnResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class AddPartitionsToTxnResponse_v2(Response):
|
||||
API_KEY = 24
|
||||
API_VERSION = 2
|
||||
SCHEMA = AddPartitionsToTxnResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class AddPartitionsToTxnRequest_v0(Request):
|
||||
API_KEY = 24
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = AddPartitionsToTxnResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(Int32)))))
|
||||
|
||||
|
||||
class AddPartitionsToTxnRequest_v1(Request):
|
||||
API_KEY = 24
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = AddPartitionsToTxnResponse_v1
|
||||
SCHEMA = AddPartitionsToTxnRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class AddPartitionsToTxnRequest_v2(Request):
|
||||
API_KEY = 24
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = AddPartitionsToTxnResponse_v2
|
||||
SCHEMA = AddPartitionsToTxnRequest_v1.SCHEMA
|
||||
|
||||
|
||||
AddPartitionsToTxnRequest = [
|
||||
AddPartitionsToTxnRequest_v0, AddPartitionsToTxnRequest_v1, AddPartitionsToTxnRequest_v2,
|
||||
]
|
||||
AddPartitionsToTxnResponse = [
|
||||
AddPartitionsToTxnResponse_v0, AddPartitionsToTxnResponse_v1, AddPartitionsToTxnResponse_v2,
|
||||
]
|
||||
@@ -1,14 +1,67 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
# enum in stdlib as of py3.4
|
||||
try:
|
||||
from enum import IntEnum # pylint: disable=import-error
|
||||
except ImportError:
|
||||
# vendored backport module
|
||||
from kafka.vendor.enum34 import IntEnum
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String, Float64, CompactString, CompactArray, TaggedFields
|
||||
from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class ApiVersionResponse_v0(Response):
|
||||
API_KEY = 18
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', Array(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16)))
|
||||
)
|
||||
|
||||
|
||||
class ApiVersionResponse_v1(Response):
|
||||
API_KEY = 18
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', Array(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16))),
|
||||
('throttle_time_ms', Int32)
|
||||
)
|
||||
|
||||
|
||||
class ApiVersionResponse_v2(Response):
|
||||
API_KEY = 18
|
||||
API_VERSION = 2
|
||||
SCHEMA = ApiVersionResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionRequest_v0(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = ApiVersionResponse_v0
|
||||
SCHEMA = Schema()
|
||||
|
||||
|
||||
class ApiVersionRequest_v1(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = ApiVersionResponse_v1
|
||||
SCHEMA = ApiVersionRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionRequest_v2(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = ApiVersionResponse_v1
|
||||
SCHEMA = ApiVersionRequest_v0.SCHEMA
|
||||
|
||||
|
||||
ApiVersionRequest = [
|
||||
ApiVersionRequest_v0, ApiVersionRequest_v1, ApiVersionRequest_v2,
|
||||
]
|
||||
ApiVersionResponse = [
|
||||
ApiVersionResponse_v0, ApiVersionResponse_v1, ApiVersionResponse_v2,
|
||||
]
|
||||
|
||||
|
||||
class CreateTopicsResponse_v0(Response):
|
||||
@@ -186,38 +239,6 @@ DeleteTopicsResponse = [
|
||||
]
|
||||
|
||||
|
||||
class DeleteRecordsResponse_v0(Response):
|
||||
API_KEY = 21
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('topics', Array(
|
||||
('name', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition_index', Int32),
|
||||
('low_watermark', Int64),
|
||||
('error_code', Int16))))),
|
||||
)
|
||||
|
||||
|
||||
class DeleteRecordsRequest_v0(Request):
|
||||
API_KEY = 21
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = DeleteRecordsResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('name', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition_index', Int32),
|
||||
('offset', Int64))))),
|
||||
('timeout_ms', Int32)
|
||||
)
|
||||
|
||||
|
||||
DeleteRecordsResponse = [DeleteRecordsResponse_v0]
|
||||
DeleteRecordsRequest = [DeleteRecordsRequest_v0]
|
||||
|
||||
|
||||
class ListGroupsResponse_v0(Response):
|
||||
API_KEY = 16
|
||||
API_VERSION = 0
|
||||
@@ -385,6 +406,41 @@ DescribeGroupsResponse = [
|
||||
]
|
||||
|
||||
|
||||
class SaslHandShakeResponse_v0(Response):
|
||||
API_KEY = 17
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('enabled_mechanisms', Array(String('utf-8')))
|
||||
)
|
||||
|
||||
|
||||
class SaslHandShakeResponse_v1(Response):
|
||||
API_KEY = 17
|
||||
API_VERSION = 1
|
||||
SCHEMA = SaslHandShakeResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class SaslHandShakeRequest_v0(Request):
|
||||
API_KEY = 17
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = SaslHandShakeResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('mechanism', String('utf-8'))
|
||||
)
|
||||
|
||||
|
||||
class SaslHandShakeRequest_v1(Request):
|
||||
API_KEY = 17
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = SaslHandShakeResponse_v1
|
||||
SCHEMA = SaslHandShakeRequest_v0.SCHEMA
|
||||
|
||||
|
||||
SaslHandShakeRequest = [SaslHandShakeRequest_v0, SaslHandShakeRequest_v1]
|
||||
SaslHandShakeResponse = [SaslHandShakeResponse_v0, SaslHandShakeResponse_v1]
|
||||
|
||||
|
||||
class DescribeAclsResponse_v0(Response):
|
||||
API_KEY = 29
|
||||
API_VERSION = 0
|
||||
@@ -467,8 +523,8 @@ class DescribeAclsRequest_v2(Request):
|
||||
SCHEMA = DescribeAclsRequest_v1.SCHEMA
|
||||
|
||||
|
||||
DescribeAclsRequest = [DescribeAclsRequest_v0, DescribeAclsRequest_v1, DescribeAclsRequest_v2]
|
||||
DescribeAclsResponse = [DescribeAclsResponse_v0, DescribeAclsResponse_v1, DescribeAclsResponse_v2]
|
||||
DescribeAclsRequest = [DescribeAclsRequest_v0, DescribeAclsRequest_v1]
|
||||
DescribeAclsResponse = [DescribeAclsResponse_v0, DescribeAclsResponse_v1]
|
||||
|
||||
class CreateAclsResponse_v0(Response):
|
||||
API_KEY = 30
|
||||
@@ -663,7 +719,7 @@ class DescribeConfigsResponse_v1(Response):
|
||||
('config_names', String('utf-8')),
|
||||
('config_value', String('utf-8')),
|
||||
('read_only', Boolean),
|
||||
('config_source', Int8),
|
||||
('is_default', Boolean),
|
||||
('is_sensitive', Boolean),
|
||||
('config_synonyms', Array(
|
||||
('config_name', String('utf-8')),
|
||||
@@ -734,47 +790,6 @@ DescribeConfigsResponse = [
|
||||
]
|
||||
|
||||
|
||||
class DescribeLogDirsResponse_v0(Response):
|
||||
API_KEY = 35
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('log_dirs', Array(
|
||||
('error_code', Int16),
|
||||
('log_dir', String('utf-8')),
|
||||
('topics', Array(
|
||||
('name', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition_index', Int32),
|
||||
('partition_size', Int64),
|
||||
('offset_lag', Int64),
|
||||
('is_future_key', Boolean)
|
||||
))
|
||||
))
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
class DescribeLogDirsRequest_v0(Request):
|
||||
API_KEY = 35
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = DescribeLogDirsResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Int32)
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
DescribeLogDirsResponse = [
|
||||
DescribeLogDirsResponse_v0,
|
||||
]
|
||||
DescribeLogDirsRequest = [
|
||||
DescribeLogDirsRequest_v0,
|
||||
]
|
||||
|
||||
|
||||
class SaslAuthenticateResponse_v0(Response):
|
||||
API_KEY = 36
|
||||
API_VERSION = 0
|
||||
@@ -908,208 +923,3 @@ DeleteGroupsRequest = [
|
||||
DeleteGroupsResponse = [
|
||||
DeleteGroupsResponse_v0, DeleteGroupsResponse_v1
|
||||
]
|
||||
|
||||
|
||||
class DescribeClientQuotasResponse_v0(Response):
|
||||
API_KEY = 48
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8')),
|
||||
('entries', Array(
|
||||
('entity', Array(
|
||||
('entity_type', String('utf-8')),
|
||||
('entity_name', String('utf-8')))),
|
||||
('values', Array(
|
||||
('name', String('utf-8')),
|
||||
('value', Float64))))),
|
||||
)
|
||||
|
||||
|
||||
class DescribeClientQuotasRequest_v0(Request):
|
||||
API_KEY = 48
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = DescribeClientQuotasResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('components', Array(
|
||||
('entity_type', String('utf-8')),
|
||||
('match_type', Int8),
|
||||
('match', String('utf-8')),
|
||||
)),
|
||||
('strict', Boolean)
|
||||
)
|
||||
|
||||
|
||||
DescribeClientQuotasRequest = [
|
||||
DescribeClientQuotasRequest_v0,
|
||||
]
|
||||
|
||||
DescribeClientQuotasResponse = [
|
||||
DescribeClientQuotasResponse_v0,
|
||||
]
|
||||
|
||||
|
||||
class AlterPartitionReassignmentsResponse_v0(Response):
|
||||
API_KEY = 45
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
("throttle_time_ms", Int32),
|
||||
("error_code", Int16),
|
||||
("error_message", CompactString("utf-8")),
|
||||
("responses", CompactArray(
|
||||
("name", CompactString("utf-8")),
|
||||
("partitions", CompactArray(
|
||||
("partition_index", Int32),
|
||||
("error_code", Int16),
|
||||
("error_message", CompactString("utf-8")),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)
|
||||
FLEXIBLE_VERSION = True
|
||||
|
||||
|
||||
class AlterPartitionReassignmentsRequest_v0(Request):
|
||||
FLEXIBLE_VERSION = True
|
||||
API_KEY = 45
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = AlterPartitionReassignmentsResponse_v0
|
||||
SCHEMA = Schema(
|
||||
("timeout_ms", Int32),
|
||||
("topics", CompactArray(
|
||||
("name", CompactString("utf-8")),
|
||||
("partitions", CompactArray(
|
||||
("partition_index", Int32),
|
||||
("replicas", CompactArray(Int32)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)
|
||||
|
||||
|
||||
AlterPartitionReassignmentsRequest = [AlterPartitionReassignmentsRequest_v0]
|
||||
|
||||
AlterPartitionReassignmentsResponse = [AlterPartitionReassignmentsResponse_v0]
|
||||
|
||||
|
||||
class ListPartitionReassignmentsResponse_v0(Response):
|
||||
API_KEY = 46
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
("throttle_time_ms", Int32),
|
||||
("error_code", Int16),
|
||||
("error_message", CompactString("utf-8")),
|
||||
("topics", CompactArray(
|
||||
("name", CompactString("utf-8")),
|
||||
("partitions", CompactArray(
|
||||
("partition_index", Int32),
|
||||
("replicas", CompactArray(Int32)),
|
||||
("adding_replicas", CompactArray(Int32)),
|
||||
("removing_replicas", CompactArray(Int32)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)
|
||||
FLEXIBLE_VERSION = True
|
||||
|
||||
|
||||
class ListPartitionReassignmentsRequest_v0(Request):
|
||||
FLEXIBLE_VERSION = True
|
||||
API_KEY = 46
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = ListPartitionReassignmentsResponse_v0
|
||||
SCHEMA = Schema(
|
||||
("timeout_ms", Int32),
|
||||
("topics", CompactArray(
|
||||
("name", CompactString("utf-8")),
|
||||
("partition_index", CompactArray(Int32)),
|
||||
("tags", TaggedFields)
|
||||
)),
|
||||
("tags", TaggedFields)
|
||||
)
|
||||
|
||||
|
||||
ListPartitionReassignmentsRequest = [ListPartitionReassignmentsRequest_v0]
|
||||
|
||||
ListPartitionReassignmentsResponse = [ListPartitionReassignmentsResponse_v0]
|
||||
|
||||
|
||||
class ElectLeadersResponse_v0(Response):
|
||||
API_KEY = 43
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
('replication_election_results', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partition_result', Array(
|
||||
('partition_id', Int32),
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8'))
|
||||
))
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
class ElectLeadersRequest_v0(Request):
|
||||
API_KEY = 43
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = ElectLeadersResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('election_type', Int8),
|
||||
('topic_partitions', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partition_ids', Array(Int32))
|
||||
)),
|
||||
('timeout', Int32),
|
||||
)
|
||||
|
||||
|
||||
class ElectLeadersResponse_v1(Response):
|
||||
API_KEY = 43
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
('replication_election_results', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partition_result', Array(
|
||||
('partition_id', Int32),
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8'))
|
||||
))
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
class ElectLeadersRequest_v1(Request):
|
||||
API_KEY = 43
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = ElectLeadersResponse_v1
|
||||
SCHEMA = Schema(
|
||||
('election_type', Int8),
|
||||
('topic_partitions', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partition_ids', Array(Int32))
|
||||
)),
|
||||
('timeout', Int32),
|
||||
)
|
||||
|
||||
|
||||
class ElectionType(IntEnum):
|
||||
""" Leader election type
|
||||
"""
|
||||
|
||||
PREFERRED = 0,
|
||||
UNCLEAN = 1
|
||||
|
||||
|
||||
ElectLeadersRequest = [ElectLeadersRequest_v0, ElectLeadersRequest_v1]
|
||||
ElectLeadersResponse = [ElectLeadersResponse_v0, ElectLeadersResponse_v1]
|
||||
|
||||
@@ -3,9 +3,7 @@ from __future__ import absolute_import
|
||||
import abc
|
||||
|
||||
from kafka.protocol.struct import Struct
|
||||
from kafka.protocol.types import Int16, Int32, String, Schema, Array, TaggedFields
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
from kafka.protocol.types import Int16, Int32, String, Schema, Array
|
||||
|
||||
|
||||
class RequestHeader(Struct):
|
||||
@@ -22,38 +20,8 @@ class RequestHeader(Struct):
|
||||
)
|
||||
|
||||
|
||||
class RequestHeaderV2(Struct):
|
||||
# Flexible response / request headers end in field buffer
|
||||
SCHEMA = Schema(
|
||||
('api_key', Int16),
|
||||
('api_version', Int16),
|
||||
('correlation_id', Int32),
|
||||
('client_id', String('utf-8')),
|
||||
('tags', TaggedFields),
|
||||
)
|
||||
|
||||
def __init__(self, request, correlation_id=0, client_id='kafka-python', tags=None):
|
||||
super(RequestHeaderV2, self).__init__(
|
||||
request.API_KEY, request.API_VERSION, correlation_id, client_id, tags or {}
|
||||
)
|
||||
|
||||
|
||||
class ResponseHeader(Struct):
|
||||
SCHEMA = Schema(
|
||||
('correlation_id', Int32),
|
||||
)
|
||||
|
||||
|
||||
class ResponseHeaderV2(Struct):
|
||||
SCHEMA = Schema(
|
||||
('correlation_id', Int32),
|
||||
('tags', TaggedFields),
|
||||
)
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class Request(Struct):
|
||||
FLEXIBLE_VERSION = False
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractproperty
|
||||
def API_KEY(self):
|
||||
@@ -82,15 +50,9 @@ class Request(Struct):
|
||||
def to_object(self):
|
||||
return _to_object(self.SCHEMA, self)
|
||||
|
||||
def build_header(self, correlation_id, client_id):
|
||||
if self.FLEXIBLE_VERSION:
|
||||
return RequestHeaderV2(self, correlation_id=correlation_id, client_id=client_id)
|
||||
return RequestHeader(self, correlation_id=correlation_id, client_id=client_id)
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class Response(Struct):
|
||||
FLEXIBLE_VERSION = False
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractproperty
|
||||
def API_KEY(self):
|
||||
@@ -110,12 +72,6 @@ class Response(Struct):
|
||||
def to_object(self):
|
||||
return _to_object(self.SCHEMA, self)
|
||||
|
||||
@classmethod
|
||||
def parse_header(cls, read_buffer):
|
||||
if cls.FLEXIBLE_VERSION:
|
||||
return ResponseHeaderV2.decode(read_buffer)
|
||||
return ResponseHeader.decode(read_buffer)
|
||||
|
||||
|
||||
def _to_object(schema, data):
|
||||
obj = {}
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Schema, TaggedFields
|
||||
|
||||
|
||||
class BaseApiVersionsResponse(Response):
|
||||
API_KEY = 18
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', Array(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16)))
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
if isinstance(data, bytes):
|
||||
data = BytesIO(data)
|
||||
# Check error_code, decode as v0 if any error
|
||||
curr = data.tell()
|
||||
err = Int16.decode(data)
|
||||
data.seek(curr)
|
||||
if err != 0:
|
||||
return ApiVersionsResponse_v0.decode(data)
|
||||
return super(BaseApiVersionsResponse, cls).decode(data)
|
||||
|
||||
|
||||
class ApiVersionsResponse_v0(Response):
|
||||
API_KEY = 18
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', Array(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16)))
|
||||
)
|
||||
|
||||
|
||||
class ApiVersionsResponse_v1(BaseApiVersionsResponse):
|
||||
API_KEY = 18
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', Array(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16))),
|
||||
('throttle_time_ms', Int32)
|
||||
)
|
||||
|
||||
|
||||
class ApiVersionsResponse_v2(BaseApiVersionsResponse):
|
||||
API_KEY = 18
|
||||
API_VERSION = 2
|
||||
SCHEMA = ApiVersionsResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionsResponse_v3(BaseApiVersionsResponse):
|
||||
API_KEY = 18
|
||||
API_VERSION = 3
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('api_versions', CompactArray(
|
||||
('api_key', Int16),
|
||||
('min_version', Int16),
|
||||
('max_version', Int16),
|
||||
('_tagged_fields', TaggedFields))),
|
||||
('throttle_time_ms', Int32),
|
||||
('_tagged_fields', TaggedFields)
|
||||
)
|
||||
# Note: ApiVersions Response does not send FLEXIBLE_VERSION header!
|
||||
|
||||
|
||||
class ApiVersionsResponse_v4(BaseApiVersionsResponse):
|
||||
API_KEY = 18
|
||||
API_VERSION = 4
|
||||
SCHEMA = ApiVersionsResponse_v3.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionsRequest_v0(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = ApiVersionsResponse_v0
|
||||
SCHEMA = Schema()
|
||||
|
||||
|
||||
class ApiVersionsRequest_v1(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = ApiVersionsResponse_v1
|
||||
SCHEMA = ApiVersionsRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionsRequest_v2(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = ApiVersionsResponse_v2
|
||||
SCHEMA = ApiVersionsRequest_v1.SCHEMA
|
||||
|
||||
|
||||
class ApiVersionsRequest_v3(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = ApiVersionsResponse_v3
|
||||
SCHEMA = Schema(
|
||||
('client_software_name', CompactString('utf-8')),
|
||||
('client_software_version', CompactString('utf-8')),
|
||||
('_tagged_fields', TaggedFields)
|
||||
)
|
||||
FLEXIBLE_VERSION = True
|
||||
|
||||
|
||||
class ApiVersionsRequest_v4(Request):
|
||||
API_KEY = 18
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = ApiVersionsResponse_v4
|
||||
SCHEMA = ApiVersionsRequest_v3.SCHEMA
|
||||
FLEXIBLE_VERSION = True
|
||||
|
||||
|
||||
ApiVersionsRequest = [
|
||||
ApiVersionsRequest_v0, ApiVersionsRequest_v1, ApiVersionsRequest_v2,
|
||||
ApiVersionsRequest_v3, ApiVersionsRequest_v4,
|
||||
]
|
||||
ApiVersionsResponse = [
|
||||
ApiVersionsResponse_v0, ApiVersionsResponse_v1, ApiVersionsResponse_v2,
|
||||
ApiVersionsResponse_v3, ApiVersionsResponse_v4,
|
||||
]
|
||||
@@ -1,68 +0,0 @@
|
||||
BROKER_API_VERSIONS = {
|
||||
# api_versions responses prior to (0, 10) are synthesized for compatibility
|
||||
(0, 8, 0): {0: (0, 0), 1: (0, 0), 2: (0, 0), 3: (0, 0)},
|
||||
# adds offset commit + fetch
|
||||
(0, 8, 1): {0: (0, 0), 1: (0, 0), 2: (0, 0), 3: (0, 0), 8: (0, 0), 9: (0, 0)},
|
||||
# adds find coordinator
|
||||
(0, 8, 2): {0: (0, 0), 1: (0, 0), 2: (0, 0), 3: (0, 0), 8: (0, 1), 9: (0, 1), 10: (0, 0)},
|
||||
# adds group management (join/sync/leave/heartbeat)
|
||||
(0, 9): {0: (0, 1), 1: (0, 1), 2: (0, 0), 3: (0, 0), 8: (0, 2), 9: (0, 1), 10: (0, 0), 11: (0, 0), 12: (0, 0), 13: (0, 0), 14: (0, 0), 15: (0, 0), 16: (0, 0)},
|
||||
# adds message format v1, sasl, and api versions api
|
||||
(0, 10, 0): {0: (0, 2), 1: (0, 2), 2: (0, 0), 3: (0, 1), 4: (0, 0), 5: (0, 0), 6: (0, 2), 7: (1, 1), 8: (0, 2), 9: (0, 1), 10: (0, 0), 11: (0, 0), 12: (0, 0), 13: (0, 0), 14: (0, 0), 15: (0, 0), 16: (0, 0), 17: (0, 0), 18: (0, 0)},
|
||||
|
||||
# All data below is copied from brokers via api_versions_response (see make servers/*/api_versions)
|
||||
# adds admin apis create/delete topics, and bumps fetch/listoffsets/metadata/joingroup
|
||||
(0, 10, 1): {0: (0, 2), 1: (0, 3), 2: (0, 1), 3: (0, 2), 4: (0, 0), 5: (0, 0), 6: (0, 2), 7: (1, 1), 8: (0, 2), 9: (0, 1), 10: (0, 0), 11: (0, 1), 12: (0, 0), 13: (0, 0), 14: (0, 0), 15: (0, 0), 16: (0, 0), 17: (0, 0), 18: (0, 0), 19: (0, 0), 20: (0, 0)},
|
||||
|
||||
# bumps offsetfetch/create-topics
|
||||
(0, 10, 2): {0: (0, 2), 1: (0, 3), 2: (0, 1), 3: (0, 2), 4: (0, 0), 5: (0, 0), 6: (0, 3), 7: (1, 1), 8: (0, 2), 9: (0, 2), 10: (0, 0), 11: (0, 1), 12: (0, 0), 13: (0, 0), 14: (0, 0), 15: (0, 0), 16: (0, 0), 17: (0, 0), 18: (0, 0), 19: (0, 1), 20: (0, 0)},
|
||||
|
||||
# Adds message format v2, and more admin apis (describe/create/delete acls, describe/alter configs, etc)
|
||||
(0, 11): {0: (0, 3), 1: (0, 5), 2: (0, 2), 3: (0, 4), 4: (0, 0), 5: (0, 0), 6: (0, 3), 7: (1, 1), 8: (0, 3), 9: (0, 3), 10: (0, 1), 11: (0, 2), 12: (0, 1), 13: (0, 1), 14: (0, 1), 15: (0, 1), 16: (0, 1), 17: (0, 0), 18: (0, 1), 19: (0, 2), 20: (0, 1), 21: (0, 0), 22: (0, 0), 23: (0, 0), 24: (0, 0), 25: (0, 0), 26: (0, 0), 27: (0, 0), 28: (0, 0), 29: (0, 0), 30: (0, 0), 31: (0, 0), 32: (0, 0), 33: (0, 0)},
|
||||
|
||||
# Adds Sasl Authenticate, and additional admin apis (describe/alter log dirs, etc)
|
||||
(1, 0): {0: (0, 5), 1: (0, 6), 2: (0, 2), 3: (0, 5), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 3), 9: (0, 3), 10: (0, 1), 11: (0, 2), 12: (0, 1), 13: (0, 1), 14: (0, 1), 15: (0, 1), 16: (0, 1), 17: (0, 1), 18: (0, 1), 19: (0, 2), 20: (0, 1), 21: (0, 0), 22: (0, 0), 23: (0, 0), 24: (0, 0), 25: (0, 0), 26: (0, 0), 27: (0, 0), 28: (0, 0), 29: (0, 0), 30: (0, 0), 31: (0, 0), 32: (0, 0), 33: (0, 0), 34: (0, 0), 35: (0, 0), 36: (0, 0), 37: (0, 0)},
|
||||
|
||||
(1, 1): {0: (0, 5), 1: (0, 7), 2: (0, 2), 3: (0, 5), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 3), 9: (0, 3), 10: (0, 1), 11: (0, 2), 12: (0, 1), 13: (0, 1), 14: (0, 1), 15: (0, 1), 16: (0, 1), 17: (0, 1), 18: (0, 1), 19: (0, 2), 20: (0, 1), 21: (0, 0), 22: (0, 0), 23: (0, 0), 24: (0, 0), 25: (0, 0), 26: (0, 0), 27: (0, 0), 28: (0, 0), 29: (0, 0), 30: (0, 0), 31: (0, 0), 32: (0, 1), 33: (0, 0), 34: (0, 0), 35: (0, 0), 36: (0, 0), 37: (0, 0), 38: (0, 0), 39: (0, 0), 40: (0, 0), 41: (0, 0), 42: (0, 0)},
|
||||
|
||||
(2, 0): {0: (0, 6), 1: (0, 8), 2: (0, 3), 3: (0, 6), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 4), 9: (0, 4), 10: (0, 2), 11: (0, 3), 12: (0, 2), 13: (0, 2), 14: (0, 2), 15: (0, 2), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 2), 21: (0, 1), 22: (0, 1), 23: (0, 1), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 1), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 0), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1)},
|
||||
|
||||
(2, 1): {0: (0, 7), 1: (0, 10), 2: (0, 4), 3: (0, 7), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 6), 9: (0, 5), 10: (0, 2), 11: (0, 3), 12: (0, 2), 13: (0, 2), 14: (0, 2), 15: (0, 2), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 3), 21: (0, 1), 22: (0, 1), 23: (0, 2), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 2), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 0), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1)},
|
||||
|
||||
(2, 2): {0: (0, 7), 1: (0, 10), 2: (0, 5), 3: (0, 7), 4: (0, 2), 5: (0, 1), 6: (0, 5), 7: (0, 2), 8: (0, 6), 9: (0, 5), 10: (0, 2), 11: (0, 4), 12: (0, 2), 13: (0, 2), 14: (0, 2), 15: (0, 2), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 3), 21: (0, 1), 22: (0, 1), 23: (0, 2), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 2), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 1), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1), 43: (0, 0)},
|
||||
|
||||
(2, 3): {0: (0, 7), 1: (0, 11), 2: (0, 5), 3: (0, 8), 4: (0, 2), 5: (0, 1), 6: (0, 5), 7: (0, 2), 8: (0, 7), 9: (0, 5), 10: (0, 2), 11: (0, 5), 12: (0, 3), 13: (0, 2), 14: (0, 3), 15: (0, 3), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 3), 21: (0, 1), 22: (0, 1), 23: (0, 3), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 2), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 1), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1), 43: (0, 0), 44: (0, 0)},
|
||||
|
||||
(2, 4): {0: (0, 8), 1: (0, 11), 2: (0, 5), 3: (0, 9), 4: (0, 4), 5: (0, 2), 6: (0, 6), 7: (0, 3), 8: (0, 8), 9: (0, 6), 10: (0, 3), 11: (0, 6), 12: (0, 4), 13: (0, 4), 14: (0, 4), 15: (0, 5), 16: (0, 3), 17: (0, 1), 18: (0, 3), 19: (0, 5), 20: (0, 4), 21: (0, 1), 22: (0, 2), 23: (0, 3), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 2), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 1), 37: (0, 1), 38: (0, 2), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0)},
|
||||
|
||||
(2, 5): {0: (0, 8), 1: (0, 11), 2: (0, 5), 3: (0, 9), 4: (0, 4), 5: (0, 2), 6: (0, 6), 7: (0, 3), 8: (0, 8), 9: (0, 7), 10: (0, 3), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 3), 17: (0, 1), 18: (0, 3), 19: (0, 5), 20: (0, 4), 21: (0, 1), 22: (0, 3), 23: (0, 3), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 2), 37: (0, 2), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0)},
|
||||
|
||||
(2, 6): {0: (0, 8), 1: (0, 11), 2: (0, 5), 3: (0, 9), 4: (0, 4), 5: (0, 3), 6: (0, 6), 7: (0, 3), 8: (0, 8), 9: (0, 7), 10: (0, 3), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 5), 20: (0, 4), 21: (0, 2), 22: (0, 3), 23: (0, 3), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 3), 33: (0, 1), 34: (0, 1), 35: (0, 2), 36: (0, 2), 37: (0, 2), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 0), 49: (0, 0)},
|
||||
|
||||
(2, 7): {0: (0, 8), 1: (0, 12), 2: (0, 5), 3: (0, 9), 4: (0, 4), 5: (0, 3), 6: (0, 6), 7: (0, 3), 8: (0, 8), 9: (0, 7), 10: (0, 3), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 6), 20: (0, 5), 21: (0, 2), 22: (0, 4), 23: (0, 3), 24: (0, 2), 25: (0, 2), 26: (0, 2), 27: (0, 0), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 3), 33: (0, 1), 34: (0, 1), 35: (0, 2), 36: (0, 2), 37: (0, 3), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 0), 49: (0, 0), 50: (0, 0), 51: (0, 0), 56: (0, 0), 57: (0, 0)},
|
||||
|
||||
(2, 8): {0: (0, 9), 1: (0, 12), 2: (0, 6), 3: (0, 11), 4: (0, 5), 5: (0, 3), 6: (0, 7), 7: (0, 3), 8: (0, 8), 9: (0, 7), 10: (0, 3), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 2), 36: (0, 2), 37: (0, 3), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 0), 57: (0, 0), 60: (0, 0), 61: (0, 0)},
|
||||
|
||||
(3, 0): {0: (0, 9), 1: (0, 12), 2: (0, 7), 3: (0, 11), 4: (0, 5), 5: (0, 3), 6: (0, 7), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 2), 36: (0, 2), 37: (0, 3), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 0), 57: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 1): {0: (0, 9), 1: (0, 13), 2: (0, 7), 3: (0, 12), 4: (0, 5), 5: (0, 3), 6: (0, 7), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 7), 12: (0, 4), 13: (0, 4), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 2), 36: (0, 2), 37: (0, 3), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 0), 57: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 2): {0: (0, 9), 1: (0, 13), 2: (0, 7), 3: (0, 12), 4: (0, 6), 5: (0, 3), 6: (0, 7), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 2), 30: (0, 2), 31: (0, 2), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 3), 36: (0, 2), 37: (0, 3), 38: (0, 2), 39: (0, 2), 40: (0, 2), 41: (0, 2), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 1), 57: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 3): {0: (0, 9), 1: (0, 13), 2: (0, 7), 3: (0, 12), 4: (0, 6), 5: (0, 3), 6: (0, 7), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 2), 57: (0, 1), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 4): {0: (0, 9), 1: (0, 13), 2: (0, 7), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 2), 57: (0, 1), 58: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 5): {0: (0, 9), 1: (0, 15), 2: (0, 8), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 3), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 6): {0: (0, 9), 1: (0, 15), 2: (0, 8), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 8), 9: (0, 8), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 4), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 0), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0)},
|
||||
|
||||
(3, 7): {0: (0, 10), 1: (0, 16), 2: (0, 8), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 9), 9: (0, 9), 10: (0, 4), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 4), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 4), 23: (0, 4), 24: (0, 4), 25: (0, 3), 26: (0, 3), 27: (0, 1), 28: (0, 3), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 1), 61: (0, 0), 65: (0, 0), 66: (0, 0), 67: (0, 0), 68: (0, 0)},
|
||||
|
||||
(3, 8): {0: (0, 11), 1: (0, 16), 2: (0, 8), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 9), 9: (0, 9), 10: (0, 5), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 5), 17: (0, 1), 18: (0, 3), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 5), 23: (0, 4), 24: (0, 5), 25: (0, 4), 26: (0, 4), 27: (0, 1), 28: (0, 4), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 1), 61: (0, 0), 65: (0, 0), 66: (0, 1), 67: (0, 0), 68: (0, 0), 69: (0, 0)},
|
||||
|
||||
(3, 9): {0: (0, 11), 1: (0, 17), 2: (0, 9), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 9), 9: (0, 9), 10: (0, 6), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 5), 23: (0, 4), 24: (0, 5), 25: (0, 4), 26: (0, 4), 27: (0, 1), 28: (0, 4), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 1), 61: (0, 0), 65: (0, 0), 66: (0, 1), 67: (0, 0), 68: (0, 0), 69: (0, 0)},
|
||||
|
||||
(4, 0): {0: (0, 12), 1: (4, 17), 2: (1, 10), 3: (0, 13), 8: (2, 9), 9: (1, 9), 10: (0, 6), 11: (2, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 6), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (2, 7), 20: (1, 6), 21: (0, 2), 22: (0, 5), 23: (2, 4), 24: (0, 5), 25: (0, 4), 26: (0, 5), 27: (1, 1), 28: (0, 5), 29: (1, 3), 30: (1, 3), 31: (1, 3), 32: (1, 4), 33: (0, 2), 34: (1, 2), 35: (1, 4), 36: (0, 2), 37: (0, 3), 38: (1, 3), 39: (1, 2), 40: (1, 2), 41: (1, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 55: (0, 2), 57: (0, 2), 60: (0, 2), 61: (0, 0), 64: (0, 0), 65: (0, 0), 66: (0, 1), 68: (0, 1), 69: (0, 1), 74: (0, 0), 75: (0, 0), 80: (0, 0), 81: (0, 0)},
|
||||
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String
|
||||
from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class OffsetCommitResponse_v0(Response):
|
||||
@@ -41,24 +41,6 @@ class OffsetCommitResponse_v3(Response):
|
||||
)
|
||||
|
||||
|
||||
class OffsetCommitResponse_v4(Response):
|
||||
API_KEY = 8
|
||||
API_VERSION = 4
|
||||
SCHEMA = OffsetCommitResponse_v3.SCHEMA
|
||||
|
||||
|
||||
class OffsetCommitResponse_v5(Response):
|
||||
API_KEY = 8
|
||||
API_VERSION = 5
|
||||
SCHEMA = OffsetCommitResponse_v4.SCHEMA
|
||||
|
||||
|
||||
class OffsetCommitResponse_v6(Response):
|
||||
API_KEY = 8
|
||||
API_VERSION = 6
|
||||
SCHEMA = OffsetCommitResponse_v5.SCHEMA
|
||||
|
||||
|
||||
class OffsetCommitRequest_v0(Request):
|
||||
API_KEY = 8
|
||||
API_VERSION = 0 # Zookeeper-backed storage
|
||||
@@ -94,13 +76,13 @@ class OffsetCommitRequest_v1(Request):
|
||||
|
||||
class OffsetCommitRequest_v2(Request):
|
||||
API_KEY = 8
|
||||
API_VERSION = 2
|
||||
API_VERSION = 2 # added retention_time, dropped timestamp
|
||||
RESPONSE_TYPE = OffsetCommitResponse_v2
|
||||
SCHEMA = Schema(
|
||||
('consumer_group', String('utf-8')),
|
||||
('consumer_group_generation_id', Int32),
|
||||
('consumer_id', String('utf-8')),
|
||||
('retention_time', Int64), # added retention_time, dropped timestamp
|
||||
('retention_time', Int64),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
@@ -108,6 +90,7 @@ class OffsetCommitRequest_v2(Request):
|
||||
('offset', Int64),
|
||||
('metadata', String('utf-8'))))))
|
||||
)
|
||||
DEFAULT_GENERATION_ID = -1
|
||||
DEFAULT_RETENTION_TIME = -1
|
||||
|
||||
|
||||
@@ -116,63 +99,15 @@ class OffsetCommitRequest_v3(Request):
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = OffsetCommitResponse_v3
|
||||
SCHEMA = OffsetCommitRequest_v2.SCHEMA
|
||||
DEFAULT_RETENTION_TIME = -1
|
||||
|
||||
|
||||
class OffsetCommitRequest_v4(Request):
|
||||
API_KEY = 8
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = OffsetCommitResponse_v4
|
||||
SCHEMA = OffsetCommitRequest_v3.SCHEMA
|
||||
DEFAULT_RETENTION_TIME = -1
|
||||
|
||||
|
||||
class OffsetCommitRequest_v5(Request):
|
||||
API_KEY = 8
|
||||
API_VERSION = 5 # drops retention_time
|
||||
RESPONSE_TYPE = OffsetCommitResponse_v5
|
||||
SCHEMA = Schema(
|
||||
('consumer_group', String('utf-8')),
|
||||
('consumer_group_generation_id', Int32),
|
||||
('consumer_id', String('utf-8')),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('offset', Int64),
|
||||
('metadata', String('utf-8'))))))
|
||||
)
|
||||
|
||||
|
||||
class OffsetCommitRequest_v6(Request):
|
||||
API_KEY = 8
|
||||
API_VERSION = 6
|
||||
RESPONSE_TYPE = OffsetCommitResponse_v6
|
||||
SCHEMA = Schema(
|
||||
('consumer_group', String('utf-8')),
|
||||
('consumer_group_generation_id', Int32),
|
||||
('consumer_id', String('utf-8')),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('offset', Int64),
|
||||
('leader_epoch', Int32), # added for fencing / kip-320. default -1
|
||||
('metadata', String('utf-8'))))))
|
||||
)
|
||||
|
||||
|
||||
OffsetCommitRequest = [
|
||||
OffsetCommitRequest_v0, OffsetCommitRequest_v1,
|
||||
OffsetCommitRequest_v2, OffsetCommitRequest_v3,
|
||||
OffsetCommitRequest_v4, OffsetCommitRequest_v5,
|
||||
OffsetCommitRequest_v6,
|
||||
OffsetCommitRequest_v2, OffsetCommitRequest_v3
|
||||
]
|
||||
OffsetCommitResponse = [
|
||||
OffsetCommitResponse_v0, OffsetCommitResponse_v1,
|
||||
OffsetCommitResponse_v2, OffsetCommitResponse_v3,
|
||||
OffsetCommitResponse_v4, OffsetCommitResponse_v5,
|
||||
OffsetCommitResponse_v6,
|
||||
OffsetCommitResponse_v2, OffsetCommitResponse_v3
|
||||
]
|
||||
|
||||
|
||||
@@ -228,29 +163,6 @@ class OffsetFetchResponse_v3(Response):
|
||||
)
|
||||
|
||||
|
||||
class OffsetFetchResponse_v4(Response):
|
||||
API_KEY = 9
|
||||
API_VERSION = 4
|
||||
SCHEMA = OffsetFetchResponse_v3.SCHEMA
|
||||
|
||||
|
||||
class OffsetFetchResponse_v5(Response):
|
||||
API_KEY = 9
|
||||
API_VERSION = 5
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('offset', Int64),
|
||||
('leader_epoch', Int32),
|
||||
('metadata', String('utf-8')),
|
||||
('error_code', Int16))))),
|
||||
('error_code', Int16)
|
||||
)
|
||||
|
||||
|
||||
class OffsetFetchRequest_v0(Request):
|
||||
API_KEY = 9
|
||||
API_VERSION = 0 # zookeeper-backed storage
|
||||
@@ -287,27 +199,57 @@ class OffsetFetchRequest_v3(Request):
|
||||
SCHEMA = OffsetFetchRequest_v2.SCHEMA
|
||||
|
||||
|
||||
class OffsetFetchRequest_v4(Request):
|
||||
API_KEY = 9
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = OffsetFetchResponse_v4
|
||||
SCHEMA = OffsetFetchRequest_v3.SCHEMA
|
||||
|
||||
|
||||
class OffsetFetchRequest_v5(Request):
|
||||
API_KEY = 9
|
||||
API_VERSION = 5
|
||||
RESPONSE_TYPE = OffsetFetchResponse_v5
|
||||
SCHEMA = OffsetFetchRequest_v4.SCHEMA
|
||||
|
||||
|
||||
OffsetFetchRequest = [
|
||||
OffsetFetchRequest_v0, OffsetFetchRequest_v1,
|
||||
OffsetFetchRequest_v2, OffsetFetchRequest_v3,
|
||||
OffsetFetchRequest_v4, OffsetFetchRequest_v5,
|
||||
]
|
||||
OffsetFetchResponse = [
|
||||
OffsetFetchResponse_v0, OffsetFetchResponse_v1,
|
||||
OffsetFetchResponse_v2, OffsetFetchResponse_v3,
|
||||
OffsetFetchResponse_v4, OffsetFetchResponse_v5,
|
||||
]
|
||||
|
||||
|
||||
class GroupCoordinatorResponse_v0(Response):
|
||||
API_KEY = 10
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('coordinator_id', Int32),
|
||||
('host', String('utf-8')),
|
||||
('port', Int32)
|
||||
)
|
||||
|
||||
|
||||
class GroupCoordinatorResponse_v1(Response):
|
||||
API_KEY = 10
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8')),
|
||||
('coordinator_id', Int32),
|
||||
('host', String('utf-8')),
|
||||
('port', Int32)
|
||||
)
|
||||
|
||||
|
||||
class GroupCoordinatorRequest_v0(Request):
|
||||
API_KEY = 10
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = GroupCoordinatorResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('consumer_group', String('utf-8'))
|
||||
)
|
||||
|
||||
|
||||
class GroupCoordinatorRequest_v1(Request):
|
||||
API_KEY = 10
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = GroupCoordinatorResponse_v1
|
||||
SCHEMA = Schema(
|
||||
('coordinator_key', String('utf-8')),
|
||||
('coordinator_type', Int8)
|
||||
)
|
||||
|
||||
|
||||
GroupCoordinatorRequest = [GroupCoordinatorRequest_v0, GroupCoordinatorRequest_v1]
|
||||
GroupCoordinatorResponse = [GroupCoordinatorResponse_v0, GroupCoordinatorResponse_v1]
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Boolean, Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class EndTxnResponse_v0(Response):
|
||||
API_KEY = 26
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
)
|
||||
|
||||
|
||||
class EndTxnResponse_v1(Response):
|
||||
API_KEY = 26
|
||||
API_VERSION = 1
|
||||
SCHEMA = EndTxnResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class EndTxnResponse_v2(Response):
|
||||
API_KEY = 26
|
||||
API_VERSION = 2
|
||||
SCHEMA = EndTxnResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class EndTxnRequest_v0(Request):
|
||||
API_KEY = 26
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = EndTxnResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
('committed', Boolean))
|
||||
|
||||
|
||||
class EndTxnRequest_v1(Request):
|
||||
API_KEY = 26
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = EndTxnResponse_v1
|
||||
SCHEMA = EndTxnRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class EndTxnRequest_v2(Request):
|
||||
API_KEY = 26
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = EndTxnResponse_v2
|
||||
SCHEMA = EndTxnRequest_v1.SCHEMA
|
||||
|
||||
|
||||
EndTxnRequest = [
|
||||
EndTxnRequest_v0, EndTxnRequest_v1, EndTxnRequest_v2,
|
||||
]
|
||||
EndTxnResponse = [
|
||||
EndTxnResponse_v0, EndTxnResponse_v1, EndTxnResponse_v2,
|
||||
]
|
||||
@@ -1,15 +1,9 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import collections
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String, Bytes
|
||||
|
||||
|
||||
AbortedTransaction = collections.namedtuple("AbortedTransaction",
|
||||
["producer_id", "first_offset"])
|
||||
|
||||
|
||||
class FetchResponse_v0(Response):
|
||||
API_KEY = 1
|
||||
API_VERSION = 0
|
||||
@@ -20,7 +14,7 @@ class FetchResponse_v0(Response):
|
||||
('partition', Int32),
|
||||
('error_code', Int16),
|
||||
('highwater_offset', Int64),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +29,7 @@ class FetchResponse_v1(Response):
|
||||
('partition', Int32),
|
||||
('error_code', Int16),
|
||||
('highwater_offset', Int64),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -52,7 +46,6 @@ class FetchResponse_v3(Response):
|
||||
|
||||
|
||||
class FetchResponse_v4(Response):
|
||||
# Adds message format v2
|
||||
API_KEY = 1
|
||||
API_VERSION = 4
|
||||
SCHEMA = Schema(
|
||||
@@ -67,7 +60,7 @@ class FetchResponse_v4(Response):
|
||||
('aborted_transactions', Array(
|
||||
('producer_id', Int64),
|
||||
('first_offset', Int64))),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -87,7 +80,7 @@ class FetchResponse_v5(Response):
|
||||
('aborted_transactions', Array(
|
||||
('producer_id', Int64),
|
||||
('first_offset', Int64))),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -122,7 +115,7 @@ class FetchResponse_v7(Response):
|
||||
('aborted_transactions', Array(
|
||||
('producer_id', Int64),
|
||||
('first_offset', Int64))),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -163,7 +156,7 @@ class FetchResponse_v11(Response):
|
||||
('producer_id', Int64),
|
||||
('first_offset', Int64))),
|
||||
('preferred_read_replica', Int32),
|
||||
('records', Bytes)))))
|
||||
('message_set', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -218,7 +211,6 @@ class FetchRequest_v3(Request):
|
||||
|
||||
class FetchRequest_v4(Request):
|
||||
# Adds isolation_level field
|
||||
# Adds message format v2
|
||||
API_KEY = 1
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = FetchResponse_v4
|
||||
@@ -272,7 +264,7 @@ class FetchRequest_v6(Request):
|
||||
|
||||
class FetchRequest_v7(Request):
|
||||
"""
|
||||
Add incremental fetch requests (see KIP-227)
|
||||
Add incremental fetch requests
|
||||
"""
|
||||
API_KEY = 1
|
||||
API_VERSION = 7
|
||||
@@ -293,7 +285,7 @@ class FetchRequest_v7(Request):
|
||||
('log_start_offset', Int64),
|
||||
('max_bytes', Int32))))),
|
||||
('forgotten_topics_data', Array(
|
||||
('topic', String('utf-8')),
|
||||
('topic', String),
|
||||
('partitions', Array(Int32))
|
||||
)),
|
||||
)
|
||||
@@ -333,7 +325,7 @@ class FetchRequest_v9(Request):
|
||||
('log_start_offset', Int64),
|
||||
('max_bytes', Int32))))),
|
||||
('forgotten_topics_data', Array(
|
||||
('topic', String('utf-8')),
|
||||
('topic', String),
|
||||
('partitions', Array(Int32)),
|
||||
)),
|
||||
)
|
||||
@@ -373,7 +365,7 @@ class FetchRequest_v11(Request):
|
||||
('log_start_offset', Int64),
|
||||
('max_bytes', Int32))))),
|
||||
('forgotten_topics_data', Array(
|
||||
('topic', String('utf-8')),
|
||||
('topic', String),
|
||||
('partitions', Array(Int32))
|
||||
)),
|
||||
('rack_id', String('utf-8')),
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Int8, Int16, Int32, Schema, String
|
||||
|
||||
|
||||
class FindCoordinatorResponse_v0(Response):
|
||||
API_KEY = 10
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('coordinator_id', Int32),
|
||||
('host', String('utf-8')),
|
||||
('port', Int32)
|
||||
)
|
||||
|
||||
|
||||
class FindCoordinatorResponse_v1(Response):
|
||||
API_KEY = 10
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8')),
|
||||
('coordinator_id', Int32),
|
||||
('host', String('utf-8')),
|
||||
('port', Int32)
|
||||
)
|
||||
|
||||
|
||||
class FindCoordinatorResponse_v2(Response):
|
||||
API_KEY = 10
|
||||
API_VERSION = 2
|
||||
SCHEMA = FindCoordinatorResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class FindCoordinatorRequest_v0(Request):
|
||||
API_KEY = 10
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = FindCoordinatorResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('consumer_group', String('utf-8'))
|
||||
)
|
||||
|
||||
|
||||
class FindCoordinatorRequest_v1(Request):
|
||||
API_KEY = 10
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = FindCoordinatorResponse_v1
|
||||
SCHEMA = Schema(
|
||||
('coordinator_key', String('utf-8')),
|
||||
('coordinator_type', Int8) # 0: consumer, 1: transaction
|
||||
)
|
||||
|
||||
|
||||
class FindCoordinatorRequest_v2(Request):
|
||||
API_KEY = 10
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = FindCoordinatorResponse_v2
|
||||
SCHEMA = FindCoordinatorRequest_v1.SCHEMA
|
||||
|
||||
|
||||
FindCoordinatorRequest = [FindCoordinatorRequest_v0, FindCoordinatorRequest_v1, FindCoordinatorRequest_v2]
|
||||
FindCoordinatorResponse = [FindCoordinatorResponse_v0, FindCoordinatorResponse_v1, FindCoordinatorResponse_v2]
|
||||
@@ -5,10 +5,6 @@ from kafka.protocol.struct import Struct
|
||||
from kafka.protocol.types import Array, Bytes, Int16, Int32, Schema, String
|
||||
|
||||
|
||||
DEFAULT_GENERATION_ID = -1
|
||||
UNKNOWN_MEMBER_ID = ''
|
||||
|
||||
|
||||
class JoinGroupResponse_v0(Response):
|
||||
API_KEY = 11
|
||||
API_VERSION = 0
|
||||
@@ -46,18 +42,6 @@ class JoinGroupResponse_v2(Response):
|
||||
)
|
||||
|
||||
|
||||
class JoinGroupResponse_v3(Response):
|
||||
API_KEY = 11
|
||||
API_VERSION = 3
|
||||
SCHEMA = JoinGroupResponse_v2.SCHEMA
|
||||
|
||||
|
||||
class JoinGroupResponse_v4(Response):
|
||||
API_KEY = 11
|
||||
API_VERSION = 4
|
||||
SCHEMA = JoinGroupResponse_v3.SCHEMA
|
||||
|
||||
|
||||
class JoinGroupRequest_v0(Request):
|
||||
API_KEY = 11
|
||||
API_VERSION = 0
|
||||
@@ -71,6 +55,7 @@ class JoinGroupRequest_v0(Request):
|
||||
('protocol_name', String('utf-8')),
|
||||
('protocol_metadata', Bytes)))
|
||||
)
|
||||
UNKNOWN_MEMBER_ID = ''
|
||||
|
||||
|
||||
class JoinGroupRequest_v1(Request):
|
||||
@@ -87,6 +72,7 @@ class JoinGroupRequest_v1(Request):
|
||||
('protocol_name', String('utf-8')),
|
||||
('protocol_metadata', Bytes)))
|
||||
)
|
||||
UNKNOWN_MEMBER_ID = ''
|
||||
|
||||
|
||||
class JoinGroupRequest_v2(Request):
|
||||
@@ -94,29 +80,14 @@ class JoinGroupRequest_v2(Request):
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = JoinGroupResponse_v2
|
||||
SCHEMA = JoinGroupRequest_v1.SCHEMA
|
||||
|
||||
|
||||
class JoinGroupRequest_v3(Request):
|
||||
API_KEY = 11
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = JoinGroupResponse_v3
|
||||
SCHEMA = JoinGroupRequest_v2.SCHEMA
|
||||
|
||||
|
||||
class JoinGroupRequest_v4(Request):
|
||||
API_KEY = 11
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = JoinGroupResponse_v4
|
||||
SCHEMA = JoinGroupRequest_v3.SCHEMA
|
||||
UNKNOWN_MEMBER_ID = ''
|
||||
|
||||
|
||||
JoinGroupRequest = [
|
||||
JoinGroupRequest_v0, JoinGroupRequest_v1, JoinGroupRequest_v2,
|
||||
JoinGroupRequest_v3, JoinGroupRequest_v4,
|
||||
JoinGroupRequest_v0, JoinGroupRequest_v1, JoinGroupRequest_v2
|
||||
]
|
||||
JoinGroupResponse = [
|
||||
JoinGroupResponse_v0, JoinGroupResponse_v1, JoinGroupResponse_v2,
|
||||
JoinGroupResponse_v3, JoinGroupResponse_v4,
|
||||
JoinGroupResponse_v0, JoinGroupResponse_v1, JoinGroupResponse_v2
|
||||
]
|
||||
|
||||
|
||||
@@ -147,12 +118,6 @@ class SyncGroupResponse_v1(Response):
|
||||
)
|
||||
|
||||
|
||||
class SyncGroupResponse_v2(Response):
|
||||
API_KEY = 14
|
||||
API_VERSION = 2
|
||||
SCHEMA = SyncGroupResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class SyncGroupRequest_v0(Request):
|
||||
API_KEY = 14
|
||||
API_VERSION = 0
|
||||
@@ -174,15 +139,8 @@ class SyncGroupRequest_v1(Request):
|
||||
SCHEMA = SyncGroupRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class SyncGroupRequest_v2(Request):
|
||||
API_KEY = 14
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = SyncGroupResponse_v2
|
||||
SCHEMA = SyncGroupRequest_v1.SCHEMA
|
||||
|
||||
|
||||
SyncGroupRequest = [SyncGroupRequest_v0, SyncGroupRequest_v1, SyncGroupRequest_v2]
|
||||
SyncGroupResponse = [SyncGroupResponse_v0, SyncGroupResponse_v1, SyncGroupResponse_v2]
|
||||
SyncGroupRequest = [SyncGroupRequest_v0, SyncGroupRequest_v1]
|
||||
SyncGroupResponse = [SyncGroupResponse_v0, SyncGroupResponse_v1]
|
||||
|
||||
|
||||
class MemberAssignment(Struct):
|
||||
@@ -212,12 +170,6 @@ class HeartbeatResponse_v1(Response):
|
||||
)
|
||||
|
||||
|
||||
class HeartbeatResponse_v2(Response):
|
||||
API_KEY = 12
|
||||
API_VERSION = 2
|
||||
SCHEMA = HeartbeatResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class HeartbeatRequest_v0(Request):
|
||||
API_KEY = 12
|
||||
API_VERSION = 0
|
||||
@@ -236,15 +188,8 @@ class HeartbeatRequest_v1(Request):
|
||||
SCHEMA = HeartbeatRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class HeartbeatRequest_v2(Request):
|
||||
API_KEY = 12
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = HeartbeatResponse_v2
|
||||
SCHEMA = HeartbeatRequest_v1.SCHEMA
|
||||
|
||||
|
||||
HeartbeatRequest = [HeartbeatRequest_v0, HeartbeatRequest_v1, HeartbeatRequest_v2]
|
||||
HeartbeatResponse = [HeartbeatResponse_v0, HeartbeatResponse_v1, HeartbeatResponse_v2]
|
||||
HeartbeatRequest = [HeartbeatRequest_v0, HeartbeatRequest_v1]
|
||||
HeartbeatResponse = [HeartbeatResponse_v0, HeartbeatResponse_v1]
|
||||
|
||||
|
||||
class LeaveGroupResponse_v0(Response):
|
||||
@@ -264,12 +209,6 @@ class LeaveGroupResponse_v1(Response):
|
||||
)
|
||||
|
||||
|
||||
class LeaveGroupResponse_v2(Response):
|
||||
API_KEY = 13
|
||||
API_VERSION = 2
|
||||
SCHEMA = LeaveGroupResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class LeaveGroupRequest_v0(Request):
|
||||
API_KEY = 13
|
||||
API_VERSION = 0
|
||||
@@ -287,12 +226,5 @@ class LeaveGroupRequest_v1(Request):
|
||||
SCHEMA = LeaveGroupRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class LeaveGroupRequest_v2(Request):
|
||||
API_KEY = 13
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = LeaveGroupResponse_v2
|
||||
SCHEMA = LeaveGroupRequest_v1.SCHEMA
|
||||
|
||||
|
||||
LeaveGroupRequest = [LeaveGroupRequest_v0, LeaveGroupRequest_v1, LeaveGroupRequest_v2]
|
||||
LeaveGroupResponse = [LeaveGroupResponse_v0, LeaveGroupResponse_v1, LeaveGroupResponse_v2]
|
||||
LeaveGroupRequest = [LeaveGroupRequest_v0, LeaveGroupRequest_v1]
|
||||
LeaveGroupResponse = [LeaveGroupResponse_v0, LeaveGroupResponse_v1]
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class InitProducerIdResponse_v0(Response):
|
||||
API_KEY = 22
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('error_code', Int16),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
)
|
||||
|
||||
|
||||
class InitProducerIdResponse_v1(Response):
|
||||
API_KEY = 22
|
||||
API_VERSION = 1
|
||||
SCHEMA = InitProducerIdResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class InitProducerIdRequest_v0(Request):
|
||||
API_KEY = 22
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = InitProducerIdResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('transaction_timeout_ms', Int32),
|
||||
)
|
||||
|
||||
|
||||
class InitProducerIdRequest_v1(Request):
|
||||
API_KEY = 22
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = InitProducerIdResponse_v1
|
||||
SCHEMA = InitProducerIdRequest_v0.SCHEMA
|
||||
|
||||
|
||||
InitProducerIdRequest = [
|
||||
InitProducerIdRequest_v0, InitProducerIdRequest_v1,
|
||||
]
|
||||
InitProducerIdResponse = [
|
||||
InitProducerIdResponse_v0, InitProducerIdResponse_v1,
|
||||
]
|
||||
@@ -128,42 +128,6 @@ class MetadataResponse_v5(Response):
|
||||
)
|
||||
|
||||
|
||||
class MetadataResponse_v6(Response):
|
||||
"""Metadata Request/Response v6 is the same as v5,
|
||||
but on quota violation, brokers send out responses before throttling."""
|
||||
API_KEY = 3
|
||||
API_VERSION = 6
|
||||
SCHEMA = MetadataResponse_v5.SCHEMA
|
||||
|
||||
|
||||
class MetadataResponse_v7(Response):
|
||||
"""v7 adds per-partition leader_epoch field"""
|
||||
API_KEY = 3
|
||||
API_VERSION = 7
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('brokers', Array(
|
||||
('node_id', Int32),
|
||||
('host', String('utf-8')),
|
||||
('port', Int32),
|
||||
('rack', String('utf-8')))),
|
||||
('cluster_id', String('utf-8')),
|
||||
('controller_id', Int32),
|
||||
('topics', Array(
|
||||
('error_code', Int16),
|
||||
('topic', String('utf-8')),
|
||||
('is_internal', Boolean),
|
||||
('partitions', Array(
|
||||
('error_code', Int16),
|
||||
('partition', Int32),
|
||||
('leader', Int32),
|
||||
('leader_epoch', Int32),
|
||||
('replicas', Array(Int32)),
|
||||
('isr', Array(Int32)),
|
||||
('offline_replicas', Array(Int32))))))
|
||||
)
|
||||
|
||||
|
||||
class MetadataRequest_v0(Request):
|
||||
API_KEY = 3
|
||||
API_VERSION = 0
|
||||
@@ -171,8 +135,7 @@ class MetadataRequest_v0(Request):
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(String('utf-8')))
|
||||
)
|
||||
ALL_TOPICS = [] # Empty Array (len 0) for topics returns all topics
|
||||
NO_TOPICS = [] # v0 does not support a 'no topics' request, so we'll just ask for ALL
|
||||
ALL_TOPICS = None # Empty Array (len 0) for topics returns all topics
|
||||
|
||||
|
||||
class MetadataRequest_v1(Request):
|
||||
@@ -180,8 +143,8 @@ class MetadataRequest_v1(Request):
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = MetadataResponse_v1
|
||||
SCHEMA = MetadataRequest_v0.SCHEMA
|
||||
ALL_TOPICS = None # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = [] # Empty array (len 0) for topics returns no topics
|
||||
ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = None # Empty array (len 0) for topics returns no topics
|
||||
|
||||
|
||||
class MetadataRequest_v2(Request):
|
||||
@@ -189,8 +152,8 @@ class MetadataRequest_v2(Request):
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = MetadataResponse_v2
|
||||
SCHEMA = MetadataRequest_v1.SCHEMA
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = None # Empty array (len 0) for topics returns no topics
|
||||
|
||||
|
||||
class MetadataRequest_v3(Request):
|
||||
@@ -198,8 +161,8 @@ class MetadataRequest_v3(Request):
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = MetadataResponse_v3
|
||||
SCHEMA = MetadataRequest_v1.SCHEMA
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = None # Empty array (len 0) for topics returns no topics
|
||||
|
||||
|
||||
class MetadataRequest_v4(Request):
|
||||
@@ -210,8 +173,8 @@ class MetadataRequest_v4(Request):
|
||||
('topics', Array(String('utf-8'))),
|
||||
('allow_auto_topic_creation', Boolean)
|
||||
)
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = None # Empty array (len 0) for topics returns no topics
|
||||
|
||||
|
||||
class MetadataRequest_v5(Request):
|
||||
@@ -223,35 +186,15 @@ class MetadataRequest_v5(Request):
|
||||
API_VERSION = 5
|
||||
RESPONSE_TYPE = MetadataResponse_v5
|
||||
SCHEMA = MetadataRequest_v4.SCHEMA
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
|
||||
|
||||
class MetadataRequest_v6(Request):
|
||||
API_KEY = 3
|
||||
API_VERSION = 6
|
||||
RESPONSE_TYPE = MetadataResponse_v6
|
||||
SCHEMA = MetadataRequest_v5.SCHEMA
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
|
||||
|
||||
class MetadataRequest_v7(Request):
|
||||
API_KEY = 3
|
||||
API_VERSION = 7
|
||||
RESPONSE_TYPE = MetadataResponse_v7
|
||||
SCHEMA = MetadataRequest_v6.SCHEMA
|
||||
ALL_TOPICS = None
|
||||
NO_TOPICS = []
|
||||
ALL_TOPICS = -1 # Null Array (len -1) for topics returns all topics
|
||||
NO_TOPICS = None # Empty array (len 0) for topics returns no topics
|
||||
|
||||
|
||||
MetadataRequest = [
|
||||
MetadataRequest_v0, MetadataRequest_v1, MetadataRequest_v2,
|
||||
MetadataRequest_v3, MetadataRequest_v4, MetadataRequest_v5,
|
||||
MetadataRequest_v6, MetadataRequest_v7,
|
||||
MetadataRequest_v3, MetadataRequest_v4, MetadataRequest_v5
|
||||
]
|
||||
MetadataResponse = [
|
||||
MetadataResponse_v0, MetadataResponse_v1, MetadataResponse_v2,
|
||||
MetadataResponse_v3, MetadataResponse_v4, MetadataResponse_v5,
|
||||
MetadataResponse_v6, MetadataResponse_v7,
|
||||
MetadataResponse_v3, MetadataResponse_v4, MetadataResponse_v5
|
||||
]
|
||||
|
||||
@@ -12,7 +12,7 @@ class OffsetResetStrategy(object):
|
||||
NONE = 0
|
||||
|
||||
|
||||
class ListOffsetsResponse_v0(Response):
|
||||
class OffsetResponse_v0(Response):
|
||||
API_KEY = 2
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
@@ -24,7 +24,7 @@ class ListOffsetsResponse_v0(Response):
|
||||
('offsets', Array(Int64))))))
|
||||
)
|
||||
|
||||
class ListOffsetsResponse_v1(Response):
|
||||
class OffsetResponse_v1(Response):
|
||||
API_KEY = 2
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
@@ -38,7 +38,7 @@ class ListOffsetsResponse_v1(Response):
|
||||
)
|
||||
|
||||
|
||||
class ListOffsetsResponse_v2(Response):
|
||||
class OffsetResponse_v2(Response):
|
||||
API_KEY = 2
|
||||
API_VERSION = 2
|
||||
SCHEMA = Schema(
|
||||
@@ -53,16 +53,16 @@ class ListOffsetsResponse_v2(Response):
|
||||
)
|
||||
|
||||
|
||||
class ListOffsetsResponse_v3(Response):
|
||||
class OffsetResponse_v3(Response):
|
||||
"""
|
||||
on quota violation, brokers send out responses before throttling
|
||||
"""
|
||||
API_KEY = 2
|
||||
API_VERSION = 3
|
||||
SCHEMA = ListOffsetsResponse_v2.SCHEMA
|
||||
SCHEMA = OffsetResponse_v2.SCHEMA
|
||||
|
||||
|
||||
class ListOffsetsResponse_v4(Response):
|
||||
class OffsetResponse_v4(Response):
|
||||
"""
|
||||
Add leader_epoch to response
|
||||
"""
|
||||
@@ -81,19 +81,19 @@ class ListOffsetsResponse_v4(Response):
|
||||
)
|
||||
|
||||
|
||||
class ListOffsetsResponse_v5(Response):
|
||||
class OffsetResponse_v5(Response):
|
||||
"""
|
||||
adds a new error code, OFFSET_NOT_AVAILABLE
|
||||
"""
|
||||
API_KEY = 2
|
||||
API_VERSION = 5
|
||||
SCHEMA = ListOffsetsResponse_v4.SCHEMA
|
||||
SCHEMA = OffsetResponse_v4.SCHEMA
|
||||
|
||||
|
||||
class ListOffsetsRequest_v0(Request):
|
||||
class OffsetRequest_v0(Request):
|
||||
API_KEY = 2
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v0
|
||||
RESPONSE_TYPE = OffsetResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('topics', Array(
|
||||
@@ -107,10 +107,10 @@ class ListOffsetsRequest_v0(Request):
|
||||
'replica_id': -1
|
||||
}
|
||||
|
||||
class ListOffsetsRequest_v1(Request):
|
||||
class OffsetRequest_v1(Request):
|
||||
API_KEY = 2
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v1
|
||||
RESPONSE_TYPE = OffsetResponse_v1
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('topics', Array(
|
||||
@@ -124,10 +124,10 @@ class ListOffsetsRequest_v1(Request):
|
||||
}
|
||||
|
||||
|
||||
class ListOffsetsRequest_v2(Request):
|
||||
class OffsetRequest_v2(Request):
|
||||
API_KEY = 2
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v2
|
||||
RESPONSE_TYPE = OffsetResponse_v2
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('isolation_level', Int8), # <- added isolation_level
|
||||
@@ -142,23 +142,23 @@ class ListOffsetsRequest_v2(Request):
|
||||
}
|
||||
|
||||
|
||||
class ListOffsetsRequest_v3(Request):
|
||||
class OffsetRequest_v3(Request):
|
||||
API_KEY = 2
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v3
|
||||
SCHEMA = ListOffsetsRequest_v2.SCHEMA
|
||||
RESPONSE_TYPE = OffsetResponse_v3
|
||||
SCHEMA = OffsetRequest_v2.SCHEMA
|
||||
DEFAULTS = {
|
||||
'replica_id': -1
|
||||
}
|
||||
|
||||
|
||||
class ListOffsetsRequest_v4(Request):
|
||||
class OffsetRequest_v4(Request):
|
||||
"""
|
||||
Add current_leader_epoch to request
|
||||
"""
|
||||
API_KEY = 2
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v4
|
||||
RESPONSE_TYPE = OffsetResponse_v4
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('isolation_level', Int8), # <- added isolation_level
|
||||
@@ -166,7 +166,7 @@ class ListOffsetsRequest_v4(Request):
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('current_leader_epoch', Int32),
|
||||
('current_leader_epoch', Int64),
|
||||
('timestamp', Int64)))))
|
||||
)
|
||||
DEFAULTS = {
|
||||
@@ -174,21 +174,21 @@ class ListOffsetsRequest_v4(Request):
|
||||
}
|
||||
|
||||
|
||||
class ListOffsetsRequest_v5(Request):
|
||||
class OffsetRequest_v5(Request):
|
||||
API_KEY = 2
|
||||
API_VERSION = 5
|
||||
RESPONSE_TYPE = ListOffsetsResponse_v5
|
||||
SCHEMA = ListOffsetsRequest_v4.SCHEMA
|
||||
RESPONSE_TYPE = OffsetResponse_v5
|
||||
SCHEMA = OffsetRequest_v4.SCHEMA
|
||||
DEFAULTS = {
|
||||
'replica_id': -1
|
||||
}
|
||||
|
||||
|
||||
ListOffsetsRequest = [
|
||||
ListOffsetsRequest_v0, ListOffsetsRequest_v1, ListOffsetsRequest_v2,
|
||||
ListOffsetsRequest_v3, ListOffsetsRequest_v4, ListOffsetsRequest_v5,
|
||||
OffsetRequest = [
|
||||
OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2,
|
||||
OffsetRequest_v3, OffsetRequest_v4, OffsetRequest_v5,
|
||||
]
|
||||
ListOffsetsResponse = [
|
||||
ListOffsetsResponse_v0, ListOffsetsResponse_v1, ListOffsetsResponse_v2,
|
||||
ListOffsetsResponse_v3, ListOffsetsResponse_v4, ListOffsetsResponse_v5,
|
||||
OffsetResponse = [
|
||||
OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2,
|
||||
OffsetResponse_v3, OffsetResponse_v4, OffsetResponse_v5,
|
||||
]
|
||||
@@ -1,140 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Int64, Schema, String, TaggedFields
|
||||
|
||||
|
||||
class OffsetForLeaderEpochResponse_v0(Response):
|
||||
API_KEY = 23
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('error_code', Int16),
|
||||
('partition', Int32),
|
||||
('end_offset', Int64))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochResponse_v1(Response):
|
||||
API_KEY = 23
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('error_code', Int16),
|
||||
('partition', Int32),
|
||||
('leader_epoch', Int32),
|
||||
('end_offset', Int64))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochResponse_v2(Response):
|
||||
API_KEY = 23
|
||||
API_VERSION = 2
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('error_code', Int16),
|
||||
('partition', Int32),
|
||||
('leader_epoch', Int32),
|
||||
('end_offset', Int64))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochResponse_v3(Response):
|
||||
API_KEY = 23
|
||||
API_VERSION = 3
|
||||
SCHEMA = OffsetForLeaderEpochResponse_v2.SCHEMA
|
||||
|
||||
|
||||
class OffsetForLeaderEpochResponse_v4(Response):
|
||||
API_KEY = 23
|
||||
API_VERSION = 4
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('topics', CompactArray(
|
||||
('topic', CompactString('utf-8')),
|
||||
('partitions', CompactArray(
|
||||
('error_code', Int16),
|
||||
('partition', Int32),
|
||||
('leader_epoch', Int32),
|
||||
('end_offset', Int64),
|
||||
('tags', TaggedFields))),
|
||||
('tags', TaggedFields))),
|
||||
('tags', TaggedFields))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochRequest_v0(Request):
|
||||
API_KEY = 23
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('leader_epoch', Int32))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochRequest_v1(Request):
|
||||
API_KEY = 23
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v1
|
||||
SCHEMA = OffsetForLeaderEpochRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class OffsetForLeaderEpochRequest_v2(Request):
|
||||
API_KEY = 23
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v2
|
||||
SCHEMA = Schema(
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('current_leader_epoch', Int32),
|
||||
('leader_epoch', Int32))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochRequest_v3(Request):
|
||||
API_KEY = 23
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v3
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('current_leader_epoch', Int32),
|
||||
('leader_epoch', Int32))))))
|
||||
|
||||
|
||||
class OffsetForLeaderEpochRequest_v4(Request):
|
||||
API_KEY = 23
|
||||
API_VERSION = 4
|
||||
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v4
|
||||
SCHEMA = Schema(
|
||||
('replica_id', Int32),
|
||||
('topics', CompactArray(
|
||||
('topic', CompactString('utf-8')),
|
||||
('partitions', CompactArray(
|
||||
('partition', Int32),
|
||||
('current_leader_epoch', Int32),
|
||||
('leader_epoch', Int32),
|
||||
('tags', TaggedFields))),
|
||||
('tags', TaggedFields))),
|
||||
('tags', TaggedFields))
|
||||
|
||||
OffsetForLeaderEpochRequest = [
|
||||
OffsetForLeaderEpochRequest_v0, OffsetForLeaderEpochRequest_v1,
|
||||
OffsetForLeaderEpochRequest_v2, OffsetForLeaderEpochRequest_v3,
|
||||
OffsetForLeaderEpochRequest_v4,
|
||||
]
|
||||
OffsetForLeaderEpochResponse = [
|
||||
OffsetForLeaderEpochResponse_v0, OffsetForLeaderEpochResponse_v1,
|
||||
OffsetForLeaderEpochResponse_v2, OffsetForLeaderEpochResponse_v3,
|
||||
OffsetForLeaderEpochResponse_v4,
|
||||
]
|
||||
@@ -4,9 +4,10 @@ import collections
|
||||
import logging
|
||||
|
||||
import kafka.errors as Errors
|
||||
from kafka.protocol.find_coordinator import FindCoordinatorResponse
|
||||
from kafka.protocol.api import RequestHeader
|
||||
from kafka.protocol.commit import GroupCoordinatorResponse
|
||||
from kafka.protocol.frame import KafkaBytes
|
||||
from kafka.protocol.types import Int32, TaggedFields
|
||||
from kafka.protocol.types import Int32
|
||||
from kafka.version import __version__
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -58,8 +59,9 @@ class KafkaProtocol(object):
|
||||
log.debug('Sending request %s', request)
|
||||
if correlation_id is None:
|
||||
correlation_id = self._next_correlation_id()
|
||||
|
||||
header = request.build_header(correlation_id=correlation_id, client_id=self._client_id)
|
||||
header = RequestHeader(request,
|
||||
correlation_id=correlation_id,
|
||||
client_id=self._client_id)
|
||||
message = b''.join([header.encode(), request.encode()])
|
||||
size = Int32.encode(len(message))
|
||||
data = size + message
|
||||
@@ -133,17 +135,21 @@ class KafkaProtocol(object):
|
||||
return responses
|
||||
|
||||
def _process_response(self, read_buffer):
|
||||
if not self.in_flight_requests:
|
||||
raise Errors.CorrelationIdError('No in-flight-request found for server response')
|
||||
(correlation_id, request) = self.in_flight_requests.popleft()
|
||||
response_type = request.RESPONSE_TYPE
|
||||
response_header = response_type.parse_header(read_buffer)
|
||||
recv_correlation_id = response_header.correlation_id
|
||||
recv_correlation_id = Int32.decode(read_buffer)
|
||||
log.debug('Received correlation id: %d', recv_correlation_id)
|
||||
|
||||
if not self.in_flight_requests:
|
||||
raise Errors.CorrelationIdError(
|
||||
'No in-flight-request found for server response'
|
||||
' with correlation ID %d'
|
||||
% (recv_correlation_id,))
|
||||
|
||||
(correlation_id, request) = self.in_flight_requests.popleft()
|
||||
|
||||
# 0.8.2 quirk
|
||||
if (recv_correlation_id == 0 and
|
||||
correlation_id != 0 and
|
||||
response_type is FindCoordinatorResponse[0] and
|
||||
request.RESPONSE_TYPE is GroupCoordinatorResponse[0] and
|
||||
(self._api_version == (0, 8, 2) or self._api_version is None)):
|
||||
log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse'
|
||||
' Correlation ID does not match request. This'
|
||||
@@ -157,15 +163,15 @@ class KafkaProtocol(object):
|
||||
% (correlation_id, recv_correlation_id))
|
||||
|
||||
# decode response
|
||||
log.debug('Processing response %s', response_type.__name__)
|
||||
log.debug('Processing response %s', request.RESPONSE_TYPE.__name__)
|
||||
try:
|
||||
response = response_type.decode(read_buffer)
|
||||
response = request.RESPONSE_TYPE.decode(read_buffer)
|
||||
except ValueError:
|
||||
read_buffer.seek(0)
|
||||
buf = read_buffer.read()
|
||||
log.error('Response %d [ResponseType: %s Request: %s]:'
|
||||
' Unable to decode %d-byte buffer: %r',
|
||||
correlation_id, response_type,
|
||||
correlation_id, request.RESPONSE_TYPE,
|
||||
request, len(buf), buf)
|
||||
raise Errors.KafkaProtocolError('Unable to decode response')
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@ class ProduceResponse_v2(Response):
|
||||
|
||||
|
||||
class ProduceResponse_v3(Response):
|
||||
# Adds support for message format v2
|
||||
API_KEY = 0
|
||||
API_VERSION = 3
|
||||
SCHEMA = ProduceResponse_v2.SCHEMA
|
||||
@@ -142,7 +141,7 @@ class ProduceRequest_v0(ProduceRequest):
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('records', Bytes)))))
|
||||
('messages', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
@@ -159,7 +158,6 @@ class ProduceRequest_v2(ProduceRequest):
|
||||
|
||||
|
||||
class ProduceRequest_v3(ProduceRequest):
|
||||
# Adds support for message format v2
|
||||
API_VERSION = 3
|
||||
RESPONSE_TYPE = ProduceResponse_v3
|
||||
SCHEMA = Schema(
|
||||
@@ -170,7 +168,7 @@ class ProduceRequest_v3(ProduceRequest):
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('records', Bytes)))))
|
||||
('messages', Bytes)))))
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Bytes, Int16, Int64, Schema, String
|
||||
|
||||
|
||||
class SaslAuthenticateResponse_v0(Response):
|
||||
API_KEY = 36
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8')),
|
||||
('auth_bytes', Bytes))
|
||||
|
||||
|
||||
class SaslAuthenticateResponse_v1(Response):
|
||||
API_KEY = 36
|
||||
API_VERSION = 1
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('error_message', String('utf-8')),
|
||||
('auth_bytes', Bytes),
|
||||
('session_lifetime_ms', Int64))
|
||||
|
||||
|
||||
class SaslAuthenticateRequest_v0(Request):
|
||||
API_KEY = 36
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = SaslAuthenticateResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('auth_bytes', Bytes))
|
||||
|
||||
|
||||
class SaslAuthenticateRequest_v1(Request):
|
||||
API_KEY = 36
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = SaslAuthenticateResponse_v1
|
||||
SCHEMA = SaslAuthenticateRequest_v0.SCHEMA
|
||||
|
||||
|
||||
SaslAuthenticateRequest = [SaslAuthenticateRequest_v0, SaslAuthenticateRequest_v1]
|
||||
SaslAuthenticateResponse = [SaslAuthenticateResponse_v0, SaslAuthenticateResponse_v1]
|
||||
@@ -1,39 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Int16, Schema, String
|
||||
|
||||
|
||||
class SaslHandshakeResponse_v0(Response):
|
||||
API_KEY = 17
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('error_code', Int16),
|
||||
('enabled_mechanisms', Array(String('utf-8')))
|
||||
)
|
||||
|
||||
|
||||
class SaslHandshakeResponse_v1(Response):
|
||||
API_KEY = 17
|
||||
API_VERSION = 1
|
||||
SCHEMA = SaslHandshakeResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class SaslHandshakeRequest_v0(Request):
|
||||
API_KEY = 17
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = SaslHandshakeResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('mechanism', String('utf-8'))
|
||||
)
|
||||
|
||||
|
||||
class SaslHandshakeRequest_v1(Request):
|
||||
API_KEY = 17
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = SaslHandshakeResponse_v1
|
||||
SCHEMA = SaslHandshakeRequest_v0.SCHEMA
|
||||
|
||||
|
||||
SaslHandshakeRequest = [SaslHandshakeRequest_v0, SaslHandshakeRequest_v1]
|
||||
SaslHandshakeResponse = [SaslHandshakeResponse_v0, SaslHandshakeResponse_v1]
|
||||
@@ -1,78 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from kafka.protocol.api import Request, Response
|
||||
from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String
|
||||
|
||||
|
||||
class TxnOffsetCommitResponse_v0(Response):
|
||||
API_KEY = 28
|
||||
API_VERSION = 0
|
||||
SCHEMA = Schema(
|
||||
('throttle_time_ms', Int32),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('error_code', Int16))))))
|
||||
|
||||
|
||||
class TxnOffsetCommitResponse_v1(Response):
|
||||
API_KEY = 28
|
||||
API_VERSION = 1
|
||||
SCHEMA = TxnOffsetCommitResponse_v0.SCHEMA
|
||||
|
||||
|
||||
class TxnOffsetCommitResponse_v2(Response):
|
||||
API_KEY = 28
|
||||
API_VERSION = 2
|
||||
SCHEMA = TxnOffsetCommitResponse_v1.SCHEMA
|
||||
|
||||
|
||||
class TxnOffsetCommitRequest_v0(Request):
|
||||
API_KEY = 28
|
||||
API_VERSION = 0
|
||||
RESPONSE_TYPE = TxnOffsetCommitResponse_v0
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('group_id', String('utf-8')),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('offset', Int64),
|
||||
('metadata', String('utf-8')))))))
|
||||
|
||||
|
||||
class TxnOffsetCommitRequest_v1(Request):
|
||||
API_KEY = 28
|
||||
API_VERSION = 1
|
||||
RESPONSE_TYPE = TxnOffsetCommitResponse_v1
|
||||
SCHEMA = TxnOffsetCommitRequest_v0.SCHEMA
|
||||
|
||||
|
||||
class TxnOffsetCommitRequest_v2(Request):
|
||||
API_KEY = 28
|
||||
API_VERSION = 2
|
||||
RESPONSE_TYPE = TxnOffsetCommitResponse_v2
|
||||
SCHEMA = Schema(
|
||||
('transactional_id', String('utf-8')),
|
||||
('group_id', String('utf-8')),
|
||||
('producer_id', Int64),
|
||||
('producer_epoch', Int16),
|
||||
('topics', Array(
|
||||
('topic', String('utf-8')),
|
||||
('partitions', Array(
|
||||
('partition', Int32),
|
||||
('offset', Int64),
|
||||
('leader_epoch', Int32),
|
||||
('metadata', String('utf-8')))))))
|
||||
|
||||
|
||||
TxnOffsetCommitRequest = [
|
||||
TxnOffsetCommitRequest_v0, TxnOffsetCommitRequest_v1, TxnOffsetCommitRequest_v2,
|
||||
]
|
||||
TxnOffsetCommitResponse = [
|
||||
TxnOffsetCommitResponse_v0, TxnOffsetCommitResponse_v1, TxnOffsetCommitResponse_v2,
|
||||
]
|
||||
@@ -77,19 +77,6 @@ class Int64(AbstractType):
|
||||
return _unpack(cls._unpack, data.read(8))
|
||||
|
||||
|
||||
class Float64(AbstractType):
|
||||
_pack = struct.Struct('>d').pack
|
||||
_unpack = struct.Struct('>d').unpack
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
return _pack(cls._pack, value)
|
||||
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
return _unpack(cls._unpack, data.read(8))
|
||||
|
||||
|
||||
class String(AbstractType):
|
||||
def __init__(self, encoding='utf-8'):
|
||||
self.encoding = encoding
|
||||
@@ -194,10 +181,9 @@ class Array(AbstractType):
|
||||
def encode(self, items):
|
||||
if items is None:
|
||||
return Int32.encode(-1)
|
||||
encoded_items = [self.array_of.encode(item) for item in items]
|
||||
return b''.join(
|
||||
[Int32.encode(len(encoded_items))] +
|
||||
encoded_items
|
||||
[Int32.encode(len(items))] +
|
||||
[self.array_of.encode(item) for item in items]
|
||||
)
|
||||
|
||||
def decode(self, data):
|
||||
@@ -210,156 +196,3 @@ class Array(AbstractType):
|
||||
if list_of_items is None:
|
||||
return 'NULL'
|
||||
return '[' + ', '.join([self.array_of.repr(item) for item in list_of_items]) + ']'
|
||||
|
||||
|
||||
class UnsignedVarInt32(AbstractType):
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
value, i = 0, 0
|
||||
while True:
|
||||
b, = struct.unpack('B', data.read(1))
|
||||
if not (b & 0x80):
|
||||
break
|
||||
value |= (b & 0x7f) << i
|
||||
i += 7
|
||||
if i > 28:
|
||||
raise ValueError('Invalid value {}'.format(value))
|
||||
value |= b << i
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
value &= 0xffffffff
|
||||
ret = b''
|
||||
while (value & 0xffffff80) != 0:
|
||||
b = (value & 0x7f) | 0x80
|
||||
ret += struct.pack('B', b)
|
||||
value >>= 7
|
||||
ret += struct.pack('B', value)
|
||||
return ret
|
||||
|
||||
|
||||
class VarInt32(AbstractType):
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
value = UnsignedVarInt32.decode(data)
|
||||
return (value >> 1) ^ -(value & 1)
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
# bring it in line with the java binary repr
|
||||
value &= 0xffffffff
|
||||
return UnsignedVarInt32.encode((value << 1) ^ (value >> 31))
|
||||
|
||||
|
||||
class VarInt64(AbstractType):
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
value, i = 0, 0
|
||||
while True:
|
||||
b = data.read(1)
|
||||
if not (b & 0x80):
|
||||
break
|
||||
value |= (b & 0x7f) << i
|
||||
i += 7
|
||||
if i > 63:
|
||||
raise ValueError('Invalid value {}'.format(value))
|
||||
value |= b << i
|
||||
return (value >> 1) ^ -(value & 1)
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
# bring it in line with the java binary repr
|
||||
value &= 0xffffffffffffffff
|
||||
v = (value << 1) ^ (value >> 63)
|
||||
ret = b''
|
||||
while (v & 0xffffffffffffff80) != 0:
|
||||
b = (value & 0x7f) | 0x80
|
||||
ret += struct.pack('B', b)
|
||||
v >>= 7
|
||||
ret += struct.pack('B', v)
|
||||
return ret
|
||||
|
||||
|
||||
class CompactString(String):
|
||||
def decode(self, data):
|
||||
length = UnsignedVarInt32.decode(data) - 1
|
||||
if length < 0:
|
||||
return None
|
||||
value = data.read(length)
|
||||
if len(value) != length:
|
||||
raise ValueError('Buffer underrun decoding string')
|
||||
return value.decode(self.encoding)
|
||||
|
||||
def encode(self, value):
|
||||
if value is None:
|
||||
return UnsignedVarInt32.encode(0)
|
||||
value = str(value).encode(self.encoding)
|
||||
return UnsignedVarInt32.encode(len(value) + 1) + value
|
||||
|
||||
|
||||
class TaggedFields(AbstractType):
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
num_fields = UnsignedVarInt32.decode(data)
|
||||
ret = {}
|
||||
if not num_fields:
|
||||
return ret
|
||||
prev_tag = -1
|
||||
for i in range(num_fields):
|
||||
tag = UnsignedVarInt32.decode(data)
|
||||
if tag <= prev_tag:
|
||||
raise ValueError('Invalid or out-of-order tag {}'.format(tag))
|
||||
prev_tag = tag
|
||||
size = UnsignedVarInt32.decode(data)
|
||||
val = data.read(size)
|
||||
ret[tag] = val
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
ret = UnsignedVarInt32.encode(len(value))
|
||||
for k, v in value.items():
|
||||
# do we allow for other data types ?? It could get complicated really fast
|
||||
assert isinstance(v, bytes), 'Value {} is not a byte array'.format(v)
|
||||
assert isinstance(k, int) and k > 0, 'Key {} is not a positive integer'.format(k)
|
||||
ret += UnsignedVarInt32.encode(k)
|
||||
ret += v
|
||||
return ret
|
||||
|
||||
|
||||
class CompactBytes(AbstractType):
|
||||
@classmethod
|
||||
def decode(cls, data):
|
||||
length = UnsignedVarInt32.decode(data) - 1
|
||||
if length < 0:
|
||||
return None
|
||||
value = data.read(length)
|
||||
if len(value) != length:
|
||||
raise ValueError('Buffer underrun decoding Bytes')
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def encode(cls, value):
|
||||
if value is None:
|
||||
return UnsignedVarInt32.encode(0)
|
||||
else:
|
||||
return UnsignedVarInt32.encode(len(value) + 1) + value
|
||||
|
||||
|
||||
class CompactArray(Array):
|
||||
|
||||
def encode(self, items):
|
||||
if items is None:
|
||||
return UnsignedVarInt32.encode(0)
|
||||
return b''.join(
|
||||
[UnsignedVarInt32.encode(len(items) + 1)] +
|
||||
[self.array_of.encode(item) for item in items]
|
||||
)
|
||||
|
||||
def decode(self, data):
|
||||
length = UnsignedVarInt32.decode(data) - 1
|
||||
if length == -1:
|
||||
return None
|
||||
return [self.array_of.decode(data) for _ in range(length)]
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ def crc_update(crc, data):
|
||||
Returns:
|
||||
32-bit updated CRC-32C as long.
|
||||
"""
|
||||
if not isinstance(data, array.array) or data.itemsize != 1:
|
||||
if type(data) != array.array or data.itemsize != 1:
|
||||
buf = array.array("B", data)
|
||||
else:
|
||||
buf = data
|
||||
|
||||
@@ -1,19 +1,11 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class ABCRecord(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
__slots__ = ()
|
||||
|
||||
@abc.abstractproperty
|
||||
def size_in_bytes(self):
|
||||
""" Number of total bytes in record
|
||||
"""
|
||||
|
||||
@abc.abstractproperty
|
||||
def offset(self):
|
||||
""" Absolute offset of record
|
||||
@@ -45,11 +37,6 @@ class ABCRecord(object):
|
||||
be the checksum for v0 and v1 and None for v2 and above.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def validate_crc(self):
|
||||
""" Return True if v0/v1 record matches checksum. noop/True for v2 records
|
||||
"""
|
||||
|
||||
@abc.abstractproperty
|
||||
def headers(self):
|
||||
""" If supported by version list of key-value tuples, or empty list if
|
||||
@@ -57,8 +44,8 @@ class ABCRecord(object):
|
||||
"""
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class ABCRecordBatchBuilder(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
__slots__ = ()
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -97,11 +84,11 @@ class ABCRecordBatchBuilder(object):
|
||||
"""
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class ABCRecordBatch(object):
|
||||
""" For v2 encapsulates a RecordBatch, for v0/v1 a single (maybe
|
||||
""" For v2 incapsulates a RecordBatch, for v0/v1 a single (maybe
|
||||
compressed) message.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
__slots__ = ()
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -110,24 +97,9 @@ class ABCRecordBatch(object):
|
||||
if needed.
|
||||
"""
|
||||
|
||||
@abc.abstractproperty
|
||||
def base_offset(self):
|
||||
""" Return base offset for batch
|
||||
"""
|
||||
|
||||
@abc.abstractproperty
|
||||
def size_in_bytes(self):
|
||||
""" Return size of batch in bytes (includes header overhead)
|
||||
"""
|
||||
|
||||
@abc.abstractproperty
|
||||
def magic(self):
|
||||
""" Return magic value (0, 1, 2) for batch.
|
||||
"""
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class ABCRecords(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
__slots__ = ()
|
||||
|
||||
@abc.abstractmethod
|
||||
|
||||
@@ -60,7 +60,7 @@ from kafka.record.abc import ABCRecord, ABCRecordBatch, ABCRecordBatchBuilder
|
||||
from kafka.record.util import (
|
||||
decode_varint, encode_varint, calc_crc32c, size_of_varint
|
||||
)
|
||||
from kafka.errors import CorruptRecordError, UnsupportedCodecError
|
||||
from kafka.errors import CorruptRecordException, UnsupportedCodecError
|
||||
from kafka.codec import (
|
||||
gzip_encode, snappy_encode, lz4_encode, zstd_encode,
|
||||
gzip_decode, snappy_decode, lz4_decode, zstd_decode
|
||||
@@ -104,9 +104,6 @@ class DefaultRecordBase(object):
|
||||
|
||||
LOG_APPEND_TIME = 1
|
||||
CREATE_TIME = 0
|
||||
NO_PRODUCER_ID = -1
|
||||
NO_SEQUENCE = -1
|
||||
MAX_INT = 2147483647
|
||||
|
||||
def _assert_has_codec(self, compression_type):
|
||||
if compression_type == self.CODEC_GZIP:
|
||||
@@ -117,8 +114,6 @@ class DefaultRecordBase(object):
|
||||
checker, name = codecs.has_lz4, "lz4"
|
||||
elif compression_type == self.CODEC_ZSTD:
|
||||
checker, name = codecs.has_zstd, "zstd"
|
||||
else:
|
||||
raise UnsupportedCodecError("Unrecognized compression type: %s" % (compression_type,))
|
||||
if not checker():
|
||||
raise UnsupportedCodecError(
|
||||
"Libraries for {} compression codec not found".format(name))
|
||||
@@ -141,14 +136,6 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
def base_offset(self):
|
||||
return self._header_data[0]
|
||||
|
||||
@property
|
||||
def size_in_bytes(self):
|
||||
return self._header_data[1] + self.AFTER_LEN_OFFSET
|
||||
|
||||
@property
|
||||
def leader_epoch(self):
|
||||
return self._header_data[2]
|
||||
|
||||
@property
|
||||
def magic(self):
|
||||
return self._header_data[3]
|
||||
@@ -165,14 +152,6 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
def last_offset_delta(self):
|
||||
return self._header_data[6]
|
||||
|
||||
@property
|
||||
def last_offset(self):
|
||||
return self.base_offset + self.last_offset_delta
|
||||
|
||||
@property
|
||||
def next_offset(self):
|
||||
return self.last_offset + 1
|
||||
|
||||
@property
|
||||
def compression_type(self):
|
||||
return self.attributes & self.CODEC_MASK
|
||||
@@ -197,40 +176,6 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
def max_timestamp(self):
|
||||
return self._header_data[8]
|
||||
|
||||
@property
|
||||
def producer_id(self):
|
||||
return self._header_data[9]
|
||||
|
||||
def has_producer_id(self):
|
||||
return self.producer_id > self.NO_PRODUCER_ID
|
||||
|
||||
@property
|
||||
def producer_epoch(self):
|
||||
return self._header_data[10]
|
||||
|
||||
@property
|
||||
def base_sequence(self):
|
||||
return self._header_data[11]
|
||||
|
||||
@property
|
||||
def has_sequence(self):
|
||||
return self._header_data[11] != -1 # NO_SEQUENCE
|
||||
|
||||
@property
|
||||
def last_sequence(self):
|
||||
if self.base_sequence == self.NO_SEQUENCE:
|
||||
return self.NO_SEQUENCE
|
||||
return self._increment_sequence(self.base_sequence, self.last_offset_delta)
|
||||
|
||||
def _increment_sequence(self, base, increment):
|
||||
if base > (self.MAX_INT - increment):
|
||||
return increment - (self.MAX_INT - base) - 1
|
||||
return base + increment
|
||||
|
||||
@property
|
||||
def records_count(self):
|
||||
return self._header_data[12]
|
||||
|
||||
def _maybe_uncompress(self):
|
||||
if not self._decompressed:
|
||||
compression_type = self.compression_type
|
||||
@@ -294,14 +239,14 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
|
||||
header_count, pos = decode_varint(buffer, pos)
|
||||
if header_count < 0:
|
||||
raise CorruptRecordError("Found invalid number of record "
|
||||
raise CorruptRecordException("Found invalid number of record "
|
||||
"headers {}".format(header_count))
|
||||
headers = []
|
||||
while header_count:
|
||||
# Header key is of type String, that can't be None
|
||||
h_key_len, pos = decode_varint(buffer, pos)
|
||||
if h_key_len < 0:
|
||||
raise CorruptRecordError(
|
||||
raise CorruptRecordException(
|
||||
"Invalid negative header key size {}".format(h_key_len))
|
||||
h_key = buffer[pos: pos + h_key_len].decode("utf-8")
|
||||
pos += h_key_len
|
||||
@@ -319,17 +264,13 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
|
||||
# validate whether we have read all header bytes in the current record
|
||||
if pos - start_pos != length:
|
||||
raise CorruptRecordError(
|
||||
raise CorruptRecordException(
|
||||
"Invalid record size: expected to read {} bytes in record "
|
||||
"payload, but instead read {}".format(length, pos - start_pos))
|
||||
self._pos = pos
|
||||
|
||||
if self.is_control_batch:
|
||||
return ControlRecord(
|
||||
length, offset, timestamp, self.timestamp_type, key, value, headers)
|
||||
else:
|
||||
return DefaultRecord(
|
||||
length, offset, timestamp, self.timestamp_type, key, value, headers)
|
||||
return DefaultRecord(
|
||||
offset, timestamp, self.timestamp_type, key, value, headers)
|
||||
|
||||
def __iter__(self):
|
||||
self._maybe_uncompress()
|
||||
@@ -338,14 +279,14 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
def __next__(self):
|
||||
if self._next_record_index >= self._num_records:
|
||||
if self._pos != len(self._buffer):
|
||||
raise CorruptRecordError(
|
||||
raise CorruptRecordException(
|
||||
"{} unconsumed bytes after all records consumed".format(
|
||||
len(self._buffer) - self._pos))
|
||||
raise StopIteration
|
||||
try:
|
||||
msg = self._read_msg()
|
||||
except (ValueError, IndexError) as err:
|
||||
raise CorruptRecordError(
|
||||
raise CorruptRecordException(
|
||||
"Found invalid record structure: {!r}".format(err))
|
||||
else:
|
||||
self._next_record_index += 1
|
||||
@@ -362,25 +303,13 @@ class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch):
|
||||
verify_crc = calc_crc32c(data_view.tobytes())
|
||||
return crc == verify_crc
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
"DefaultRecordBatch(magic={}, base_offset={}, last_offset_delta={},"
|
||||
" first_timestamp={}, max_timestamp={},"
|
||||
" is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={},"
|
||||
" records_count={})".format(
|
||||
self.magic, self.base_offset, self.last_offset_delta,
|
||||
self.first_timestamp, self.max_timestamp,
|
||||
self.is_transactional, self.producer_id, self.producer_epoch, self.base_sequence,
|
||||
self.records_count))
|
||||
|
||||
|
||||
class DefaultRecord(ABCRecord):
|
||||
|
||||
__slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
||||
__slots__ = ("_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
||||
"_headers")
|
||||
|
||||
def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers):
|
||||
self._size_in_bytes = size_in_bytes
|
||||
def __init__(self, offset, timestamp, timestamp_type, key, value, headers):
|
||||
self._offset = offset
|
||||
self._timestamp = timestamp
|
||||
self._timestamp_type = timestamp_type
|
||||
@@ -388,10 +317,6 @@ class DefaultRecord(ABCRecord):
|
||||
self._value = value
|
||||
self._headers = headers
|
||||
|
||||
@property
|
||||
def size_in_bytes(self):
|
||||
return self._size_in_bytes
|
||||
|
||||
@property
|
||||
def offset(self):
|
||||
return self._offset
|
||||
@@ -428,9 +353,6 @@ class DefaultRecord(ABCRecord):
|
||||
def checksum(self):
|
||||
return None
|
||||
|
||||
def validate_crc(self):
|
||||
return True
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
"DefaultRecord(offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
||||
@@ -440,45 +362,6 @@ class DefaultRecord(ABCRecord):
|
||||
)
|
||||
|
||||
|
||||
class ControlRecord(DefaultRecord):
|
||||
__slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
||||
"_headers", "_version", "_type")
|
||||
|
||||
KEY_STRUCT = struct.Struct(
|
||||
">h" # Current Version => Int16
|
||||
"h" # Type => Int16 (0 indicates an abort marker, 1 indicates a commit)
|
||||
)
|
||||
|
||||
def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers):
|
||||
super(ControlRecord, self).__init__(size_in_bytes, offset, timestamp, timestamp_type, key, value, headers)
|
||||
(self._version, self._type) = self.KEY_STRUCT.unpack(self._key)
|
||||
|
||||
# see https://kafka.apache.org/documentation/#controlbatch
|
||||
@property
|
||||
def version(self):
|
||||
return self._version
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def abort(self):
|
||||
return self._type == 0
|
||||
|
||||
@property
|
||||
def commit(self):
|
||||
return self._type == 1
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
"ControlRecord(offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
||||
" version={!r}, type={!r} <{!s}>)".format(
|
||||
self._offset, self._timestamp, self._timestamp_type,
|
||||
self._version, self._type, "abort" if self.abort else "commit")
|
||||
)
|
||||
|
||||
|
||||
class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
||||
|
||||
# excluding key, value and headers:
|
||||
@@ -510,23 +393,6 @@ class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
||||
|
||||
self._buffer = bytearray(self.HEADER_STRUCT.size)
|
||||
|
||||
def set_producer_state(self, producer_id, producer_epoch, base_sequence, is_transactional):
|
||||
assert not is_transactional or producer_id != -1, "Cannot write transactional messages without a valid producer ID"
|
||||
assert producer_id == -1 or producer_epoch != -1, "Invalid negative producer epoch"
|
||||
assert producer_id == -1 or base_sequence != -1, "Invalid negative sequence number"
|
||||
self._producer_id = producer_id
|
||||
self._producer_epoch = producer_epoch
|
||||
self._base_sequence = base_sequence
|
||||
self._is_transactional = is_transactional
|
||||
|
||||
@property
|
||||
def producer_id(self):
|
||||
return self._producer_id
|
||||
|
||||
@property
|
||||
def producer_epoch(self):
|
||||
return self._producer_epoch
|
||||
|
||||
def _get_attributes(self, include_compression_type=True):
|
||||
attrs = 0
|
||||
if include_compression_type:
|
||||
@@ -635,8 +501,8 @@ class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
||||
0, # CRC will be set below, as we need a filled buffer for it
|
||||
self._get_attributes(use_compression_type),
|
||||
self._last_offset,
|
||||
self._first_timestamp or 0,
|
||||
self._max_timestamp or 0,
|
||||
self._first_timestamp,
|
||||
self._max_timestamp,
|
||||
self._producer_id,
|
||||
self._producer_epoch,
|
||||
self._base_sequence,
|
||||
@@ -681,15 +547,14 @@ class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
||||
"""
|
||||
return len(self._buffer)
|
||||
|
||||
@classmethod
|
||||
def header_size_in_bytes(self):
|
||||
return self.HEADER_STRUCT.size
|
||||
|
||||
@classmethod
|
||||
def size_in_bytes(self, offset_delta, timestamp_delta, key, value, headers):
|
||||
def size_in_bytes(self, offset, timestamp, key, value, headers):
|
||||
if self._first_timestamp is not None:
|
||||
timestamp_delta = timestamp - self._first_timestamp
|
||||
else:
|
||||
timestamp_delta = 0
|
||||
size_of_body = (
|
||||
1 + # Attrs
|
||||
size_of_varint(offset_delta) +
|
||||
size_of_varint(offset) +
|
||||
size_of_varint(timestamp_delta) +
|
||||
self.size_of(key, value, headers)
|
||||
)
|
||||
@@ -732,17 +597,6 @@ class DefaultRecordBatchBuilder(DefaultRecordBase, ABCRecordBatchBuilder):
|
||||
cls.size_of(key, value, headers)
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
"DefaultRecordBatchBuilder(magic={}, base_offset={}, last_offset_delta={},"
|
||||
" first_timestamp={}, max_timestamp={},"
|
||||
" is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={},"
|
||||
" records_count={})".format(
|
||||
self._magic, 0, self._last_offset,
|
||||
self._first_timestamp or 0, self._max_timestamp or 0,
|
||||
self._is_transactional, self._producer_id, self._producer_epoch, self._base_sequence,
|
||||
self._num_records))
|
||||
|
||||
|
||||
class DefaultRecordMetadata(object):
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ from kafka.codec import (
|
||||
gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka,
|
||||
)
|
||||
import kafka.codec as codecs
|
||||
from kafka.errors import CorruptRecordError, UnsupportedCodecError
|
||||
from kafka.errors import CorruptRecordException, UnsupportedCodecError
|
||||
|
||||
|
||||
class LegacyRecordBase(object):
|
||||
@@ -129,7 +129,7 @@ class LegacyRecordBase(object):
|
||||
|
||||
class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
|
||||
__slots__ = ("_buffer", "_magic", "_offset", "_length", "_crc", "_timestamp",
|
||||
__slots__ = ("_buffer", "_magic", "_offset", "_crc", "_timestamp",
|
||||
"_attributes", "_decompressed")
|
||||
|
||||
def __init__(self, buffer, magic):
|
||||
@@ -141,20 +141,11 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
assert magic == magic_
|
||||
|
||||
self._offset = offset
|
||||
self._length = length
|
||||
self._crc = crc
|
||||
self._timestamp = timestamp
|
||||
self._attributes = attrs
|
||||
self._decompressed = False
|
||||
|
||||
@property
|
||||
def base_offset(self):
|
||||
return self._offset
|
||||
|
||||
@property
|
||||
def size_in_bytes(self):
|
||||
return self._length + self.LOG_OVERHEAD
|
||||
|
||||
@property
|
||||
def timestamp_type(self):
|
||||
"""0 for CreateTime; 1 for LogAppendTime; None if unsupported.
|
||||
@@ -173,10 +164,6 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
def compression_type(self):
|
||||
return self._attributes & self.CODEC_MASK
|
||||
|
||||
@property
|
||||
def magic(self):
|
||||
return self._magic
|
||||
|
||||
def validate_crc(self):
|
||||
crc = calc_crc32(self._buffer[self.MAGIC_OFFSET:])
|
||||
return self._crc == crc
|
||||
@@ -191,7 +178,7 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
value_size = struct.unpack_from(">i", self._buffer, pos)[0]
|
||||
pos += self.VALUE_LENGTH
|
||||
if value_size == -1:
|
||||
raise CorruptRecordError("Value of compressed message is None")
|
||||
raise CorruptRecordException("Value of compressed message is None")
|
||||
else:
|
||||
data = self._buffer[pos:pos + value_size]
|
||||
|
||||
@@ -245,9 +232,6 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
value = self._buffer[pos:pos + value_size].tobytes()
|
||||
return key, value
|
||||
|
||||
def _crc_bytes(self, msg_pos, length):
|
||||
return self._buffer[msg_pos + self.MAGIC_OFFSET:msg_pos + self.LOG_OVERHEAD + length]
|
||||
|
||||
def __iter__(self):
|
||||
if self._magic == 1:
|
||||
key_offset = self.KEY_OFFSET_V1
|
||||
@@ -271,7 +255,7 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
absolute_base_offset = -1
|
||||
|
||||
for header, msg_pos in headers:
|
||||
offset, length, crc, _, attrs, timestamp = header
|
||||
offset, _, crc, _, attrs, timestamp = header
|
||||
# There should only ever be a single layer of compression
|
||||
assert not attrs & self.CODEC_MASK, (
|
||||
'MessageSet at offset %d appears double-compressed. This '
|
||||
@@ -279,7 +263,7 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
|
||||
# When magic value is greater than 0, the timestamp
|
||||
# of a compressed message depends on the
|
||||
# timestamp type of the wrapper message:
|
||||
# typestamp type of the wrapper message:
|
||||
if timestamp_type == self.LOG_APPEND_TIME:
|
||||
timestamp = self._timestamp
|
||||
|
||||
@@ -287,36 +271,28 @@ class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase):
|
||||
offset += absolute_base_offset
|
||||
|
||||
key, value = self._read_key_value(msg_pos + key_offset)
|
||||
crc_bytes = self._crc_bytes(msg_pos, length)
|
||||
yield LegacyRecord(
|
||||
self._magic, offset, timestamp, timestamp_type,
|
||||
key, value, crc, crc_bytes)
|
||||
offset, timestamp, timestamp_type,
|
||||
key, value, crc)
|
||||
else:
|
||||
key, value = self._read_key_value(key_offset)
|
||||
crc_bytes = self._crc_bytes(0, len(self._buffer) - self.LOG_OVERHEAD)
|
||||
yield LegacyRecord(
|
||||
self._magic, self._offset, self._timestamp, timestamp_type,
|
||||
key, value, self._crc, crc_bytes)
|
||||
self._offset, self._timestamp, timestamp_type,
|
||||
key, value, self._crc)
|
||||
|
||||
|
||||
class LegacyRecord(ABCRecord):
|
||||
|
||||
__slots__ = ("_magic", "_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
||||
"_crc", "_crc_bytes")
|
||||
__slots__ = ("_offset", "_timestamp", "_timestamp_type", "_key", "_value",
|
||||
"_crc")
|
||||
|
||||
def __init__(self, magic, offset, timestamp, timestamp_type, key, value, crc, crc_bytes):
|
||||
self._magic = magic
|
||||
def __init__(self, offset, timestamp, timestamp_type, key, value, crc):
|
||||
self._offset = offset
|
||||
self._timestamp = timestamp
|
||||
self._timestamp_type = timestamp_type
|
||||
self._key = key
|
||||
self._value = value
|
||||
self._crc = crc
|
||||
self._crc_bytes = crc_bytes
|
||||
|
||||
@property
|
||||
def magic(self):
|
||||
return self._magic
|
||||
|
||||
@property
|
||||
def offset(self):
|
||||
@@ -354,19 +330,11 @@ class LegacyRecord(ABCRecord):
|
||||
def checksum(self):
|
||||
return self._crc
|
||||
|
||||
def validate_crc(self):
|
||||
crc = calc_crc32(self._crc_bytes)
|
||||
return self._crc == crc
|
||||
|
||||
@property
|
||||
def size_in_bytes(self):
|
||||
return LegacyRecordBatchBuilder.estimate_size_in_bytes(self._magic, None, self._key, self._value)
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
"LegacyRecord(magic={!r} offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
||||
"LegacyRecord(offset={!r}, timestamp={!r}, timestamp_type={!r},"
|
||||
" key={!r}, value={!r}, crc={!r})".format(
|
||||
self._magic, self._offset, self._timestamp, self._timestamp_type,
|
||||
self._offset, self._timestamp, self._timestamp_type,
|
||||
self._key, self._value, self._crc)
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ from __future__ import division
|
||||
|
||||
import struct
|
||||
|
||||
from kafka.errors import CorruptRecordError, IllegalStateError, UnsupportedVersionError
|
||||
from kafka.errors import CorruptRecordException
|
||||
from kafka.record.abc import ABCRecords
|
||||
from kafka.record.legacy_records import LegacyRecordBatch, LegacyRecordBatchBuilder
|
||||
from kafka.record.default_records import DefaultRecordBatch, DefaultRecordBatchBuilder
|
||||
@@ -99,7 +99,7 @@ class MemoryRecords(ABCRecords):
|
||||
if next_slice is None:
|
||||
return None
|
||||
if len(next_slice) < _min_slice:
|
||||
raise CorruptRecordError(
|
||||
raise CorruptRecordException(
|
||||
"Record size is less than the minimum record overhead "
|
||||
"({})".format(_min_slice - self.LOG_OVERHEAD))
|
||||
self._cache_next()
|
||||
@@ -109,56 +109,31 @@ class MemoryRecords(ABCRecords):
|
||||
else:
|
||||
return DefaultRecordBatch(next_slice)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if not self.has_next():
|
||||
raise StopIteration
|
||||
return self.next_batch()
|
||||
|
||||
next = __next__
|
||||
|
||||
|
||||
class MemoryRecordsBuilder(object):
|
||||
|
||||
__slots__ = ("_builder", "_batch_size", "_buffer", "_next_offset", "_closed",
|
||||
"_magic", "_bytes_written", "_producer_id", "_producer_epoch")
|
||||
"_bytes_written")
|
||||
|
||||
def __init__(self, magic, compression_type, batch_size, offset=0,
|
||||
transactional=False, producer_id=-1, producer_epoch=-1, base_sequence=-1):
|
||||
def __init__(self, magic, compression_type, batch_size):
|
||||
assert magic in [0, 1, 2], "Not supported magic"
|
||||
assert compression_type in [0, 1, 2, 3, 4], "Not valid compression type"
|
||||
if magic >= 2:
|
||||
assert not transactional or producer_id != -1, "Cannot write transactional messages without a valid producer ID"
|
||||
assert producer_id == -1 or producer_epoch != -1, "Invalid negative producer epoch"
|
||||
assert producer_id == -1 or base_sequence != -1, "Invalid negative sequence number used"
|
||||
|
||||
self._builder = DefaultRecordBatchBuilder(
|
||||
magic=magic, compression_type=compression_type,
|
||||
is_transactional=transactional, producer_id=producer_id,
|
||||
producer_epoch=producer_epoch, base_sequence=base_sequence,
|
||||
batch_size=batch_size)
|
||||
self._producer_id = producer_id
|
||||
self._producer_epoch = producer_epoch
|
||||
is_transactional=False, producer_id=-1, producer_epoch=-1,
|
||||
base_sequence=-1, batch_size=batch_size)
|
||||
else:
|
||||
assert not transactional and producer_id == -1, "Idempotent messages are not supported for magic %s" % (magic,)
|
||||
self._builder = LegacyRecordBatchBuilder(
|
||||
magic=magic, compression_type=compression_type,
|
||||
batch_size=batch_size)
|
||||
self._producer_id = None
|
||||
self._batch_size = batch_size
|
||||
self._buffer = None
|
||||
|
||||
self._next_offset = offset
|
||||
self._next_offset = 0
|
||||
self._closed = False
|
||||
self._magic = magic
|
||||
self._bytes_written = 0
|
||||
|
||||
def skip(self, offsets_to_skip):
|
||||
# Exposed for testing compacted records
|
||||
self._next_offset += offsets_to_skip
|
||||
|
||||
def append(self, timestamp, key, value, headers=[]):
|
||||
""" Append a message to the buffer.
|
||||
|
||||
@@ -176,30 +151,6 @@ class MemoryRecordsBuilder(object):
|
||||
self._next_offset += 1
|
||||
return metadata
|
||||
|
||||
def set_producer_state(self, producer_id, producer_epoch, base_sequence, is_transactional):
|
||||
if self._magic < 2:
|
||||
raise UnsupportedVersionError('Producer State requires Message format v2+')
|
||||
elif self._closed:
|
||||
# Sequence numbers are assigned when the batch is closed while the accumulator is being drained.
|
||||
# If the resulting ProduceRequest to the partition leader failed for a retriable error, the batch will
|
||||
# be re queued. In this case, we should not attempt to set the state again, since changing the pid and sequence
|
||||
# once a batch has been sent to the broker risks introducing duplicates.
|
||||
raise IllegalStateError("Trying to set producer state of an already closed batch. This indicates a bug on the client.")
|
||||
self._builder.set_producer_state(producer_id, producer_epoch, base_sequence, is_transactional)
|
||||
self._producer_id = producer_id
|
||||
|
||||
@property
|
||||
def producer_id(self):
|
||||
return self._producer_id
|
||||
|
||||
@property
|
||||
def producer_epoch(self):
|
||||
return self._producer_epoch
|
||||
|
||||
def records(self):
|
||||
assert self._closed
|
||||
return MemoryRecords(self._buffer)
|
||||
|
||||
def close(self):
|
||||
# This method may be called multiple times on the same batch
|
||||
# i.e., on retries
|
||||
@@ -209,9 +160,6 @@ class MemoryRecordsBuilder(object):
|
||||
if not self._closed:
|
||||
self._bytes_written = self._builder.size()
|
||||
self._buffer = bytes(self._builder.build())
|
||||
if self._magic == 2:
|
||||
self._producer_id = self._builder.producer_id
|
||||
self._producer_epoch = self._builder.producer_epoch
|
||||
self._builder = None
|
||||
self._closed = True
|
||||
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import platform
|
||||
|
||||
from kafka.sasl.gssapi import SaslMechanismGSSAPI
|
||||
from kafka.sasl.msk import SaslMechanismAwsMskIam
|
||||
from kafka.sasl.oauth import SaslMechanismOAuth
|
||||
from kafka.sasl.plain import SaslMechanismPlain
|
||||
from kafka.sasl.scram import SaslMechanismScram
|
||||
from kafka.sasl.sspi import SaslMechanismSSPI
|
||||
|
||||
|
||||
SASL_MECHANISMS = {}
|
||||
|
||||
|
||||
def register_sasl_mechanism(name, klass, overwrite=False):
|
||||
if not overwrite and name in SASL_MECHANISMS:
|
||||
raise ValueError('Sasl mechanism %s already defined!' % name)
|
||||
SASL_MECHANISMS[name] = klass
|
||||
|
||||
|
||||
def get_sasl_mechanism(name):
|
||||
return SASL_MECHANISMS[name]
|
||||
|
||||
|
||||
register_sasl_mechanism('AWS_MSK_IAM', SaslMechanismAwsMskIam)
|
||||
if platform.system() == 'Windows':
|
||||
register_sasl_mechanism('GSSAPI', SaslMechanismSSPI)
|
||||
else:
|
||||
register_sasl_mechanism('GSSAPI', SaslMechanismGSSAPI)
|
||||
register_sasl_mechanism('OAUTHBEARER', SaslMechanismOAuth)
|
||||
register_sasl_mechanism('PLAIN', SaslMechanismPlain)
|
||||
register_sasl_mechanism('SCRAM-SHA-256', SaslMechanismScram)
|
||||
register_sasl_mechanism('SCRAM-SHA-512', SaslMechanismScram)
|
||||
@@ -1,33 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
|
||||
from kafka.vendor.six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(abc.ABCMeta)
|
||||
class SaslMechanism(object):
|
||||
@abc.abstractmethod
|
||||
def __init__(self, **config):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def auth_bytes(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def receive(self, auth_bytes):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_done(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_authenticated(self):
|
||||
pass
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated via SASL'
|
||||
@@ -1,96 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import struct
|
||||
|
||||
# needed for SASL_GSSAPI authentication:
|
||||
try:
|
||||
import gssapi
|
||||
from gssapi.raw.misc import GSSError
|
||||
except (ImportError, OSError):
|
||||
#no gssapi available, will disable gssapi mechanism
|
||||
gssapi = None
|
||||
GSSError = None
|
||||
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
|
||||
|
||||
class SaslMechanismGSSAPI(SaslMechanism):
|
||||
# Establish security context and negotiate protection level
|
||||
# For reference RFC 2222, section 7.2.1
|
||||
|
||||
SASL_QOP_AUTH = 1
|
||||
SASL_QOP_AUTH_INT = 2
|
||||
SASL_QOP_AUTH_CONF = 4
|
||||
|
||||
def __init__(self, **config):
|
||||
assert gssapi is not None, 'GSSAPI lib not available'
|
||||
if 'sasl_kerberos_name' not in config and 'sasl_kerberos_service_name' not in config:
|
||||
raise ValueError('sasl_kerberos_service_name or sasl_kerberos_name required for GSSAPI sasl configuration')
|
||||
self._is_done = False
|
||||
self._is_authenticated = False
|
||||
self.gssapi_name = None
|
||||
if config.get('sasl_kerberos_name', None) is not None:
|
||||
self.auth_id = str(config['sasl_kerberos_name'])
|
||||
if isinstance(config['sasl_kerberos_name'], gssapi.Name):
|
||||
self.gssapi_name = config['sasl_kerberos_name']
|
||||
else:
|
||||
kerberos_domain_name = config.get('sasl_kerberos_domain_name', '') or config.get('host', '')
|
||||
self.auth_id = config['sasl_kerberos_service_name'] + '@' + kerberos_domain_name
|
||||
if self.gssapi_name is None:
|
||||
self.gssapi_name = gssapi.Name(self.auth_id, name_type=gssapi.NameType.hostbased_service).canonicalize(gssapi.MechType.kerberos)
|
||||
self._client_ctx = gssapi.SecurityContext(name=self.gssapi_name, usage='initiate')
|
||||
self._next_token = self._client_ctx.step(None)
|
||||
|
||||
def auth_bytes(self):
|
||||
# GSSAPI Auth does not have a final broker->client message
|
||||
# so mark is_done after the final auth_bytes are provided
|
||||
# in practice we'll still receive a response when using SaslAuthenticate
|
||||
# but not when using the prior unframed approach.
|
||||
if self._is_authenticated:
|
||||
self._is_done = True
|
||||
return self._next_token or b''
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
if not self._client_ctx.complete:
|
||||
# The server will send a token back. Processing of this token either
|
||||
# establishes a security context, or it needs further token exchange.
|
||||
# The gssapi will be able to identify the needed next step.
|
||||
self._next_token = self._client_ctx.step(auth_bytes)
|
||||
elif self._is_done:
|
||||
# The final step of gssapi is send, so we do not expect any additional bytes
|
||||
# however, allow an empty message to support SaslAuthenticate response
|
||||
if auth_bytes != b'':
|
||||
raise ValueError("Unexpected receive auth_bytes after sasl/gssapi completion")
|
||||
else:
|
||||
# unwraps message containing supported protection levels and msg size
|
||||
msg = self._client_ctx.unwrap(auth_bytes).message
|
||||
# Kafka currently doesn't support integrity or confidentiality security layers, so we
|
||||
# simply set QoP to 'auth' only (first octet). We reuse the max message size proposed
|
||||
# by the server
|
||||
client_flags = self.SASL_QOP_AUTH
|
||||
server_flags = struct.Struct('>b').unpack(msg[0:1])[0]
|
||||
message_parts = [
|
||||
struct.Struct('>b').pack(client_flags & server_flags),
|
||||
msg[1:], # always agree to max message size from server
|
||||
self.auth_id.encode('utf-8'),
|
||||
]
|
||||
# add authorization identity to the response, and GSS-wrap
|
||||
self._next_token = self._client_ctx.wrap(b''.join(message_parts), False).message
|
||||
# We need to identify the last token in auth_bytes();
|
||||
# we can't rely on client_ctx.complete because it becomes True after generating
|
||||
# the second-to-last token (after calling .step(auth_bytes) for the final time)
|
||||
# We could introduce an additional state variable (i.e., self._final_token),
|
||||
# but instead we just set _is_authenticated. Since the plugin interface does
|
||||
# not read is_authenticated() until after is_done() is True, this should be fine.
|
||||
self._is_authenticated = True
|
||||
|
||||
def is_done(self):
|
||||
return self._is_done
|
||||
|
||||
def is_authenticated(self):
|
||||
return self._is_authenticated
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated as %s to %s via SASL / GSSAPI' % (self._client_ctx.initiator_name, self._client_ctx.target_name)
|
||||
@@ -1,244 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import string
|
||||
|
||||
# needed for AWS_MSK_IAM authentication:
|
||||
try:
|
||||
from botocore.session import Session as BotoSession
|
||||
except ImportError:
|
||||
# no botocore available, will disable AWS_MSK_IAM mechanism
|
||||
BotoSession = None
|
||||
|
||||
from kafka.errors import KafkaConfigurationError
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
from kafka.vendor.six.moves import urllib
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SaslMechanismAwsMskIam(SaslMechanism):
|
||||
def __init__(self, **config):
|
||||
assert BotoSession is not None, 'AWS_MSK_IAM requires the "botocore" package'
|
||||
assert config.get('security_protocol', '') == 'SASL_SSL', 'AWS_MSK_IAM requires SASL_SSL'
|
||||
assert 'host' in config, 'AWS_MSK_IAM requires host configuration'
|
||||
self.host = config['host']
|
||||
self._auth = None
|
||||
self._is_done = False
|
||||
self._is_authenticated = False
|
||||
|
||||
def _build_client(self):
|
||||
session = BotoSession()
|
||||
credentials = session.get_credentials().get_frozen_credentials()
|
||||
if not session.get_config_variable('region'):
|
||||
raise KafkaConfigurationError('Unable to determine region for AWS MSK cluster. Is AWS_DEFAULT_REGION set?')
|
||||
return AwsMskIamClient(
|
||||
host=self.host,
|
||||
access_key=credentials.access_key,
|
||||
secret_key=credentials.secret_key,
|
||||
region=session.get_config_variable('region'),
|
||||
token=credentials.token,
|
||||
)
|
||||
|
||||
def auth_bytes(self):
|
||||
client = self._build_client()
|
||||
log.debug("Generating auth token for MSK scope: %s", client._scope)
|
||||
return client.first_message()
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
self._is_done = True
|
||||
self._is_authenticated = auth_bytes != b''
|
||||
self._auth = auth_bytes.decode('utf-8')
|
||||
|
||||
def is_done(self):
|
||||
return self._is_done
|
||||
|
||||
def is_authenticated(self):
|
||||
return self._is_authenticated
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated via SASL / AWS_MSK_IAM %s' % (self._auth,)
|
||||
|
||||
|
||||
class AwsMskIamClient:
|
||||
UNRESERVED_CHARS = string.ascii_letters + string.digits + '-._~'
|
||||
|
||||
def __init__(self, host, access_key, secret_key, region, token=None):
|
||||
"""
|
||||
Arguments:
|
||||
host (str): The hostname of the broker.
|
||||
access_key (str): An AWS_ACCESS_KEY_ID.
|
||||
secret_key (str): An AWS_SECRET_ACCESS_KEY.
|
||||
region (str): An AWS_REGION.
|
||||
token (Optional[str]): An AWS_SESSION_TOKEN if using temporary
|
||||
credentials.
|
||||
"""
|
||||
self.algorithm = 'AWS4-HMAC-SHA256'
|
||||
self.expires = '900'
|
||||
self.hashfunc = hashlib.sha256
|
||||
self.headers = [
|
||||
('host', host)
|
||||
]
|
||||
self.version = '2020_10_22'
|
||||
|
||||
self.service = 'kafka-cluster'
|
||||
self.action = '{}:Connect'.format(self.service)
|
||||
|
||||
now = datetime.datetime.utcnow()
|
||||
self.datestamp = now.strftime('%Y%m%d')
|
||||
self.timestamp = now.strftime('%Y%m%dT%H%M%SZ')
|
||||
|
||||
self.host = host
|
||||
self.access_key = access_key
|
||||
self.secret_key = secret_key
|
||||
self.region = region
|
||||
self.token = token
|
||||
|
||||
@property
|
||||
def _credential(self):
|
||||
return '{0.access_key}/{0._scope}'.format(self)
|
||||
|
||||
@property
|
||||
def _scope(self):
|
||||
return '{0.datestamp}/{0.region}/{0.service}/aws4_request'.format(self)
|
||||
|
||||
@property
|
||||
def _signed_headers(self):
|
||||
"""
|
||||
Returns (str):
|
||||
An alphabetically sorted, semicolon-delimited list of lowercase
|
||||
request header names.
|
||||
"""
|
||||
return ';'.join(sorted(k.lower() for k, _ in self.headers))
|
||||
|
||||
@property
|
||||
def _canonical_headers(self):
|
||||
"""
|
||||
Returns (str):
|
||||
A newline-delited list of header names and values.
|
||||
Header names are lowercased.
|
||||
"""
|
||||
return '\n'.join(map(':'.join, self.headers)) + '\n'
|
||||
|
||||
@property
|
||||
def _canonical_request(self):
|
||||
"""
|
||||
Returns (str):
|
||||
An AWS Signature Version 4 canonical request in the format:
|
||||
<Method>\n
|
||||
<Path>\n
|
||||
<CanonicalQueryString>\n
|
||||
<CanonicalHeaders>\n
|
||||
<SignedHeaders>\n
|
||||
<HashedPayload>
|
||||
"""
|
||||
# The hashed_payload is always an empty string for MSK.
|
||||
hashed_payload = self.hashfunc(b'').hexdigest()
|
||||
return '\n'.join((
|
||||
'GET',
|
||||
'/',
|
||||
self._canonical_querystring,
|
||||
self._canonical_headers,
|
||||
self._signed_headers,
|
||||
hashed_payload,
|
||||
))
|
||||
|
||||
@property
|
||||
def _canonical_querystring(self):
|
||||
"""
|
||||
Returns (str):
|
||||
A '&'-separated list of URI-encoded key/value pairs.
|
||||
"""
|
||||
params = []
|
||||
params.append(('Action', self.action))
|
||||
params.append(('X-Amz-Algorithm', self.algorithm))
|
||||
params.append(('X-Amz-Credential', self._credential))
|
||||
params.append(('X-Amz-Date', self.timestamp))
|
||||
params.append(('X-Amz-Expires', self.expires))
|
||||
if self.token:
|
||||
params.append(('X-Amz-Security-Token', self.token))
|
||||
params.append(('X-Amz-SignedHeaders', self._signed_headers))
|
||||
|
||||
return '&'.join(self._uriencode(k) + '=' + self._uriencode(v) for k, v in params)
|
||||
|
||||
@property
|
||||
def _signing_key(self):
|
||||
"""
|
||||
Returns (bytes):
|
||||
An AWS Signature V4 signing key generated from the secret_key, date,
|
||||
region, service, and request type.
|
||||
"""
|
||||
key = self._hmac(('AWS4' + self.secret_key).encode('utf-8'), self.datestamp)
|
||||
key = self._hmac(key, self.region)
|
||||
key = self._hmac(key, self.service)
|
||||
key = self._hmac(key, 'aws4_request')
|
||||
return key
|
||||
|
||||
@property
|
||||
def _signing_str(self):
|
||||
"""
|
||||
Returns (str):
|
||||
A string used to sign the AWS Signature V4 payload in the format:
|
||||
<Algorithm>\n
|
||||
<Timestamp>\n
|
||||
<Scope>\n
|
||||
<CanonicalRequestHash>
|
||||
"""
|
||||
canonical_request_hash = self.hashfunc(self._canonical_request.encode('utf-8')).hexdigest()
|
||||
return '\n'.join((self.algorithm, self.timestamp, self._scope, canonical_request_hash))
|
||||
|
||||
def _uriencode(self, msg):
|
||||
"""
|
||||
Arguments:
|
||||
msg (str): A string to URI-encode.
|
||||
|
||||
Returns (str):
|
||||
The URI-encoded version of the provided msg, following the encoding
|
||||
rules specified: https://github.com/aws/aws-msk-iam-auth#uriencode
|
||||
"""
|
||||
return urllib.parse.quote(msg, safe=self.UNRESERVED_CHARS)
|
||||
|
||||
def _hmac(self, key, msg):
|
||||
"""
|
||||
Arguments:
|
||||
key (bytes): A key to use for the HMAC digest.
|
||||
msg (str): A value to include in the HMAC digest.
|
||||
Returns (bytes):
|
||||
An HMAC digest of the given key and msg.
|
||||
"""
|
||||
return hmac.new(key, msg.encode('utf-8'), digestmod=self.hashfunc).digest()
|
||||
|
||||
def first_message(self):
|
||||
"""
|
||||
Returns (bytes):
|
||||
An encoded JSON authentication payload that can be sent to the
|
||||
broker.
|
||||
"""
|
||||
signature = hmac.new(
|
||||
self._signing_key,
|
||||
self._signing_str.encode('utf-8'),
|
||||
digestmod=self.hashfunc,
|
||||
).hexdigest()
|
||||
msg = {
|
||||
'version': self.version,
|
||||
'host': self.host,
|
||||
'user-agent': 'kafka-python',
|
||||
'action': self.action,
|
||||
'x-amz-algorithm': self.algorithm,
|
||||
'x-amz-credential': self._credential,
|
||||
'x-amz-date': self.timestamp,
|
||||
'x-amz-signedheaders': self._signed_headers,
|
||||
'x-amz-expires': self.expires,
|
||||
'x-amz-signature': signature,
|
||||
}
|
||||
if self.token:
|
||||
msg['x-amz-security-token'] = self.token
|
||||
|
||||
return json.dumps(msg, separators=(',', ':')).encode('utf-8')
|
||||
@@ -1,100 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import abc
|
||||
import logging
|
||||
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SaslMechanismOAuth(SaslMechanism):
|
||||
|
||||
def __init__(self, **config):
|
||||
assert 'sasl_oauth_token_provider' in config, 'sasl_oauth_token_provider required for OAUTHBEARER sasl'
|
||||
assert isinstance(config['sasl_oauth_token_provider'], AbstractTokenProvider), \
|
||||
'sasl_oauth_token_provider must implement kafka.sasl.oauth.AbstractTokenProvider'
|
||||
self.token_provider = config['sasl_oauth_token_provider']
|
||||
self._error = None
|
||||
self._is_done = False
|
||||
self._is_authenticated = False
|
||||
|
||||
def auth_bytes(self):
|
||||
if self._error:
|
||||
# Server should respond to this with SaslAuthenticate failure, which ends the auth process
|
||||
return self._error
|
||||
token = self.token_provider.token()
|
||||
extensions = self._token_extensions()
|
||||
return "n,,\x01auth=Bearer {}{}\x01\x01".format(token, extensions).encode('utf-8')
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
if auth_bytes != b'':
|
||||
error = auth_bytes.decode('utf-8')
|
||||
log.debug("Sending x01 response to server after receiving SASL OAuth error: %s", error)
|
||||
self._error = b'\x01'
|
||||
else:
|
||||
self._is_done = True
|
||||
self._is_authenticated = True
|
||||
|
||||
def is_done(self):
|
||||
return self._is_done
|
||||
|
||||
def is_authenticated(self):
|
||||
return self._is_authenticated
|
||||
|
||||
def _token_extensions(self):
|
||||
"""
|
||||
Return a string representation of the OPTIONAL key-value pairs that can be sent with an OAUTHBEARER
|
||||
initial request.
|
||||
"""
|
||||
# Builds up a string separated by \x01 via a dict of key value pairs
|
||||
extensions = self.token_provider.extensions()
|
||||
msg = '\x01'.join(['{}={}'.format(k, v) for k, v in extensions.items()])
|
||||
return '\x01' + msg if msg else ''
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated via SASL / OAuth'
|
||||
|
||||
# This statement is compatible with both Python 2.7 & 3+
|
||||
ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()})
|
||||
|
||||
class AbstractTokenProvider(ABC):
|
||||
"""
|
||||
A Token Provider must be used for the SASL OAuthBearer protocol.
|
||||
|
||||
The implementation should ensure token reuse so that multiple
|
||||
calls at connect time do not create multiple tokens. The implementation
|
||||
should also periodically refresh the token in order to guarantee
|
||||
that each call returns an unexpired token. A timeout error should
|
||||
be returned after a short period of inactivity so that the
|
||||
broker can log debugging info and retry.
|
||||
|
||||
Token Providers MUST implement the token() method
|
||||
"""
|
||||
|
||||
def __init__(self, **config):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def token(self):
|
||||
"""
|
||||
Returns a (str) ID/Access Token to be sent to the Kafka
|
||||
client.
|
||||
"""
|
||||
pass
|
||||
|
||||
def extensions(self):
|
||||
"""
|
||||
This is an OPTIONAL method that may be implemented.
|
||||
|
||||
Returns a map of key-value pairs that can
|
||||
be sent with the SASL/OAUTHBEARER initial client request. If
|
||||
not implemented, the values are ignored. This feature is only available
|
||||
in Kafka >= 2.1.0.
|
||||
|
||||
All returned keys and values should be type str
|
||||
"""
|
||||
return {}
|
||||
@@ -1,41 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SaslMechanismPlain(SaslMechanism):
|
||||
|
||||
def __init__(self, **config):
|
||||
if config.get('security_protocol', '') == 'SASL_PLAINTEXT':
|
||||
log.warning('Sending username and password in the clear')
|
||||
assert 'sasl_plain_username' in config, 'sasl_plain_username required for PLAIN sasl'
|
||||
assert 'sasl_plain_password' in config, 'sasl_plain_password required for PLAIN sasl'
|
||||
|
||||
self.username = config['sasl_plain_username']
|
||||
self.password = config['sasl_plain_password']
|
||||
self._is_done = False
|
||||
self._is_authenticated = False
|
||||
|
||||
def auth_bytes(self):
|
||||
# Send PLAIN credentials per RFC-4616
|
||||
return bytes('\0'.join([self.username, self.username, self.password]).encode('utf-8'))
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
self._is_done = True
|
||||
self._is_authenticated = auth_bytes == b''
|
||||
|
||||
def is_done(self):
|
||||
return self._is_done
|
||||
|
||||
def is_authenticated(self):
|
||||
return self._is_authenticated
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated as %s via SASL / Plain' % self.username
|
||||
@@ -1,133 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
from kafka.vendor import six
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
if six.PY2:
|
||||
def xor_bytes(left, right):
|
||||
return bytearray(ord(lb) ^ ord(rb) for lb, rb in zip(left, right))
|
||||
else:
|
||||
def xor_bytes(left, right):
|
||||
return bytes(lb ^ rb for lb, rb in zip(left, right))
|
||||
|
||||
|
||||
class SaslMechanismScram(SaslMechanism):
|
||||
def __init__(self, **config):
|
||||
assert 'sasl_plain_username' in config, 'sasl_plain_username required for SCRAM sasl'
|
||||
assert 'sasl_plain_password' in config, 'sasl_plain_password required for SCRAM sasl'
|
||||
assert config.get('sasl_mechanism', '') in ScramClient.MECHANISMS, 'Unrecognized SCRAM mechanism'
|
||||
if config.get('security_protocol', '') == 'SASL_PLAINTEXT':
|
||||
log.warning('Exchanging credentials in the clear during Sasl Authentication')
|
||||
|
||||
self.username = config['sasl_plain_username']
|
||||
self.mechanism = config['sasl_mechanism']
|
||||
self._scram_client = ScramClient(
|
||||
config['sasl_plain_username'],
|
||||
config['sasl_plain_password'],
|
||||
config['sasl_mechanism']
|
||||
)
|
||||
self._state = 0
|
||||
|
||||
def auth_bytes(self):
|
||||
if self._state == 0:
|
||||
return self._scram_client.first_message()
|
||||
elif self._state == 1:
|
||||
return self._scram_client.final_message()
|
||||
else:
|
||||
raise ValueError('No auth_bytes for state: %s' % self._state)
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
if self._state == 0:
|
||||
self._scram_client.process_server_first_message(auth_bytes)
|
||||
elif self._state == 1:
|
||||
self._scram_client.process_server_final_message(auth_bytes)
|
||||
else:
|
||||
raise ValueError('Cannot receive bytes in state: %s' % self._state)
|
||||
self._state += 1
|
||||
return self.is_done()
|
||||
|
||||
def is_done(self):
|
||||
return self._state == 2
|
||||
|
||||
def is_authenticated(self):
|
||||
# receive raises if authentication fails...?
|
||||
return self._state == 2
|
||||
|
||||
def auth_details(self):
|
||||
if not self.is_authenticated:
|
||||
raise RuntimeError('Not authenticated yet!')
|
||||
return 'Authenticated as %s via SASL / %s' % (self.username, self.mechanism)
|
||||
|
||||
|
||||
class ScramClient:
|
||||
MECHANISMS = {
|
||||
'SCRAM-SHA-256': hashlib.sha256,
|
||||
'SCRAM-SHA-512': hashlib.sha512
|
||||
}
|
||||
|
||||
def __init__(self, user, password, mechanism):
|
||||
self.nonce = str(uuid.uuid4()).replace('-', '').encode('utf-8')
|
||||
self.auth_message = b''
|
||||
self.salted_password = None
|
||||
self.user = user.encode('utf-8')
|
||||
self.password = password.encode('utf-8')
|
||||
self.hashfunc = self.MECHANISMS[mechanism]
|
||||
self.hashname = ''.join(mechanism.lower().split('-')[1:3])
|
||||
self.stored_key = None
|
||||
self.client_key = None
|
||||
self.client_signature = None
|
||||
self.client_proof = None
|
||||
self.server_key = None
|
||||
self.server_signature = None
|
||||
|
||||
def first_message(self):
|
||||
client_first_bare = b'n=' + self.user + b',r=' + self.nonce
|
||||
self.auth_message += client_first_bare
|
||||
return b'n,,' + client_first_bare
|
||||
|
||||
def process_server_first_message(self, server_first_message):
|
||||
self.auth_message += b',' + server_first_message
|
||||
params = dict(pair.split('=', 1) for pair in server_first_message.decode('utf-8').split(','))
|
||||
server_nonce = params['r'].encode('utf-8')
|
||||
if not server_nonce.startswith(self.nonce):
|
||||
raise ValueError("Server nonce, did not start with client nonce!")
|
||||
self.nonce = server_nonce
|
||||
self.auth_message += b',c=biws,r=' + self.nonce
|
||||
|
||||
salt = base64.b64decode(params['s'].encode('utf-8'))
|
||||
iterations = int(params['i'])
|
||||
self.create_salted_password(salt, iterations)
|
||||
|
||||
self.client_key = self.hmac(self.salted_password, b'Client Key')
|
||||
self.stored_key = self.hashfunc(self.client_key).digest()
|
||||
self.client_signature = self.hmac(self.stored_key, self.auth_message)
|
||||
self.client_proof = xor_bytes(self.client_key, self.client_signature)
|
||||
self.server_key = self.hmac(self.salted_password, b'Server Key')
|
||||
self.server_signature = self.hmac(self.server_key, self.auth_message)
|
||||
|
||||
def hmac(self, key, msg):
|
||||
return hmac.new(key, msg, digestmod=self.hashfunc).digest()
|
||||
|
||||
def create_salted_password(self, salt, iterations):
|
||||
self.salted_password = hashlib.pbkdf2_hmac(
|
||||
self.hashname, self.password, salt, iterations
|
||||
)
|
||||
|
||||
def final_message(self):
|
||||
return b'c=biws,r=' + self.nonce + b',p=' + base64.b64encode(self.client_proof)
|
||||
|
||||
def process_server_final_message(self, server_final_message):
|
||||
params = dict(pair.split('=', 1) for pair in server_final_message.decode('utf-8').split(','))
|
||||
if self.server_signature != base64.b64decode(params['v'].encode('utf-8')):
|
||||
raise ValueError("Server sent wrong signature!")
|
||||
@@ -1,111 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
|
||||
# Windows-only
|
||||
try:
|
||||
import sspi
|
||||
import pywintypes
|
||||
import sspicon
|
||||
import win32security
|
||||
except ImportError:
|
||||
sspi = None
|
||||
|
||||
from kafka.sasl.abc import SaslMechanism
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SaslMechanismSSPI(SaslMechanism):
|
||||
# Establish security context and negotiate protection level
|
||||
# For reference see RFC 4752, section 3
|
||||
|
||||
SASL_QOP_AUTH = 1
|
||||
SASL_QOP_AUTH_INT = 2
|
||||
SASL_QOP_AUTH_CONF = 4
|
||||
|
||||
def __init__(self, **config):
|
||||
assert sspi is not None, 'No GSSAPI lib available (gssapi or sspi)'
|
||||
if 'sasl_kerberos_name' not in config and 'sasl_kerberos_service_name' not in config:
|
||||
raise ValueError('sasl_kerberos_service_name or sasl_kerberos_name required for GSSAPI sasl configuration')
|
||||
self._is_done = False
|
||||
self._is_authenticated = False
|
||||
if config.get('sasl_kerberos_name', None) is not None:
|
||||
self.auth_id = str(config['sasl_kerberos_name'])
|
||||
else:
|
||||
kerberos_domain_name = config.get('sasl_kerberos_domain_name', '') or config.get('host', '')
|
||||
self.auth_id = config['sasl_kerberos_service_name'] + '/' + kerberos_domain_name
|
||||
scheme = "Kerberos" # Do not try with Negotiate for SASL authentication. Tokens are different.
|
||||
# https://docs.microsoft.com/en-us/windows/win32/secauthn/context-requirements
|
||||
flags = (
|
||||
sspicon.ISC_REQ_MUTUAL_AUTH | # mutual authentication
|
||||
sspicon.ISC_REQ_INTEGRITY | # check for integrity
|
||||
sspicon.ISC_REQ_SEQUENCE_DETECT | # enable out-of-order messages
|
||||
sspicon.ISC_REQ_CONFIDENTIALITY # request confidentiality
|
||||
)
|
||||
self._client_ctx = sspi.ClientAuth(scheme, targetspn=self.auth_id, scflags=flags)
|
||||
self._next_token = self._client_ctx.step(None)
|
||||
|
||||
def auth_bytes(self):
|
||||
# GSSAPI Auth does not have a final broker->client message
|
||||
# so mark is_done after the final auth_bytes are provided
|
||||
# in practice we'll still receive a response when using SaslAuthenticate
|
||||
# but not when using the prior unframed approach.
|
||||
if self._client_ctx.authenticated:
|
||||
self._is_done = True
|
||||
self._is_authenticated = True
|
||||
return self._next_token or b''
|
||||
|
||||
def receive(self, auth_bytes):
|
||||
log.debug("Received token from server (size %s)", len(auth_bytes))
|
||||
if not self._client_ctx.authenticated:
|
||||
# calculate an output token from kafka token (or None on first iteration)
|
||||
# https://docs.microsoft.com/en-us/windows/win32/api/sspi/nf-sspi-initializesecuritycontexta
|
||||
# https://docs.microsoft.com/en-us/windows/win32/secauthn/initializesecuritycontext--kerberos
|
||||
# authorize method will wrap for us our token in sspi structures
|
||||
error, auth = self._client_ctx.authorize(auth_bytes)
|
||||
if len(auth) > 0 and len(auth[0].Buffer):
|
||||
log.debug("Got token from context")
|
||||
# this buffer must be sent to the server whatever the result is
|
||||
self._next_token = auth[0].Buffer
|
||||
else:
|
||||
log.debug("Got no token, exchange finished")
|
||||
# seems to be the end of the loop
|
||||
self._next_token = b''
|
||||
elif self._is_done:
|
||||
# The final step of gssapi is send, so we do not expect any additional bytes
|
||||
# however, allow an empty message to support SaslAuthenticate response
|
||||
if auth_bytes != b'':
|
||||
raise ValueError("Unexpected receive auth_bytes after sasl/gssapi completion")
|
||||
else:
|
||||
# Process the security layer negotiation token, sent by the server
|
||||
# once the security context is established.
|
||||
|
||||
# The following part is required by SASL, but not by classic Kerberos.
|
||||
# See RFC 4752
|
||||
|
||||
# unwraps message containing supported protection levels and msg size
|
||||
msg, _was_encrypted = self._client_ctx.unwrap(auth_bytes)
|
||||
|
||||
# Kafka currently doesn't support integrity or confidentiality security layers, so we
|
||||
# simply set QoP to 'auth' only (first octet). We reuse the max message size proposed
|
||||
# by the server
|
||||
client_flags = self.SASL_QOP_AUTH
|
||||
server_flags = msg[0]
|
||||
message_parts = [
|
||||
bytes(client_flags & server_flags),
|
||||
msg[:1],
|
||||
self.auth_id.encode('utf-8'),
|
||||
]
|
||||
# add authorization identity to the response, and GSS-wrap
|
||||
self._next_token = self._client_ctx.wrap(b''.join(message_parts), False)
|
||||
|
||||
def is_done(self):
|
||||
return self._is_done
|
||||
|
||||
def is_authenticated(self):
|
||||
return self._is_authenticated
|
||||
|
||||
def auth_details(self):
|
||||
return 'Authenticated as %s to %s via SASL / SSPI/GSSAPI \\o/' % (self._client_ctx.initiator_name, self._client_ctx.service_name)
|
||||
81
venv/lib/python3.12/site-packages/kafka/scram.py
Normal file
81
venv/lib/python3.12/site-packages/kafka/scram.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import uuid
|
||||
|
||||
from kafka.vendor import six
|
||||
|
||||
|
||||
if six.PY2:
|
||||
def xor_bytes(left, right):
|
||||
return bytearray(ord(lb) ^ ord(rb) for lb, rb in zip(left, right))
|
||||
else:
|
||||
def xor_bytes(left, right):
|
||||
return bytes(lb ^ rb for lb, rb in zip(left, right))
|
||||
|
||||
|
||||
class ScramClient:
|
||||
MECHANISMS = {
|
||||
'SCRAM-SHA-256': hashlib.sha256,
|
||||
'SCRAM-SHA-512': hashlib.sha512
|
||||
}
|
||||
|
||||
def __init__(self, user, password, mechanism):
|
||||
self.nonce = str(uuid.uuid4()).replace('-', '')
|
||||
self.auth_message = ''
|
||||
self.salted_password = None
|
||||
self.user = user
|
||||
self.password = password.encode('utf-8')
|
||||
self.hashfunc = self.MECHANISMS[mechanism]
|
||||
self.hashname = ''.join(mechanism.lower().split('-')[1:3])
|
||||
self.stored_key = None
|
||||
self.client_key = None
|
||||
self.client_signature = None
|
||||
self.client_proof = None
|
||||
self.server_key = None
|
||||
self.server_signature = None
|
||||
|
||||
def first_message(self):
|
||||
client_first_bare = 'n={},r={}'.format(self.user, self.nonce)
|
||||
self.auth_message += client_first_bare
|
||||
return 'n,,' + client_first_bare
|
||||
|
||||
def process_server_first_message(self, server_first_message):
|
||||
self.auth_message += ',' + server_first_message
|
||||
params = dict(pair.split('=', 1) for pair in server_first_message.split(','))
|
||||
server_nonce = params['r']
|
||||
if not server_nonce.startswith(self.nonce):
|
||||
raise ValueError("Server nonce, did not start with client nonce!")
|
||||
self.nonce = server_nonce
|
||||
self.auth_message += ',c=biws,r=' + self.nonce
|
||||
|
||||
salt = base64.b64decode(params['s'].encode('utf-8'))
|
||||
iterations = int(params['i'])
|
||||
self.create_salted_password(salt, iterations)
|
||||
|
||||
self.client_key = self.hmac(self.salted_password, b'Client Key')
|
||||
self.stored_key = self.hashfunc(self.client_key).digest()
|
||||
self.client_signature = self.hmac(self.stored_key, self.auth_message.encode('utf-8'))
|
||||
self.client_proof = xor_bytes(self.client_key, self.client_signature)
|
||||
self.server_key = self.hmac(self.salted_password, b'Server Key')
|
||||
self.server_signature = self.hmac(self.server_key, self.auth_message.encode('utf-8'))
|
||||
|
||||
def hmac(self, key, msg):
|
||||
return hmac.new(key, msg, digestmod=self.hashfunc).digest()
|
||||
|
||||
def create_salted_password(self, salt, iterations):
|
||||
self.salted_password = hashlib.pbkdf2_hmac(
|
||||
self.hashname, self.password, salt, iterations
|
||||
)
|
||||
|
||||
def final_message(self):
|
||||
return 'c=biws,r={},p={}'.format(self.nonce, base64.b64encode(self.client_proof).decode('utf-8'))
|
||||
|
||||
def process_server_final_message(self, server_final_message):
|
||||
params = dict(pair.split('=', 1) for pair in server_final_message.split(','))
|
||||
if self.server_signature != base64.b64decode(params['v'].encode('utf-8')):
|
||||
raise ValueError("Server sent wrong signature!")
|
||||
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
except ImportError:
|
||||
from urlparse import urlparse
|
||||
|
||||
import errno
|
||||
import logging
|
||||
import random
|
||||
import socket
|
||||
import struct
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProxyConnectionStates:
|
||||
DISCONNECTED = '<disconnected>'
|
||||
CONNECTING = '<connecting>'
|
||||
NEGOTIATE_PROPOSE = '<negotiate_propose>'
|
||||
NEGOTIATING = '<negotiating>'
|
||||
AUTHENTICATING = '<authenticating>'
|
||||
REQUEST_SUBMIT = '<request_submit>'
|
||||
REQUESTING = '<requesting>'
|
||||
READ_ADDRESS = '<read_address>'
|
||||
COMPLETE = '<complete>'
|
||||
|
||||
|
||||
class Socks5Wrapper:
|
||||
"""Socks5 proxy wrapper
|
||||
|
||||
Manages connection through socks5 proxy with support for username/password
|
||||
authentication.
|
||||
"""
|
||||
|
||||
def __init__(self, proxy_url, afi):
|
||||
self._buffer_in = b''
|
||||
self._buffer_out = b''
|
||||
self._proxy_url = urlparse(proxy_url)
|
||||
self._sock = None
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._target_afi = socket.AF_UNSPEC
|
||||
|
||||
proxy_addrs = self.dns_lookup(self._proxy_url.hostname, self._proxy_url.port, afi)
|
||||
# TODO raise error on lookup failure
|
||||
self._proxy_addr = random.choice(proxy_addrs)
|
||||
|
||||
@classmethod
|
||||
def is_inet_4_or_6(cls, gai):
|
||||
"""Given a getaddrinfo struct, return True iff ipv4 or ipv6"""
|
||||
return gai[0] in (socket.AF_INET, socket.AF_INET6)
|
||||
|
||||
@classmethod
|
||||
def dns_lookup(cls, host, port, afi=socket.AF_UNSPEC):
|
||||
"""Returns a list of getaddrinfo structs, optionally filtered to an afi (ipv4 / ipv6)"""
|
||||
# XXX: all DNS functions in Python are blocking. If we really
|
||||
# want to be non-blocking here, we need to use a 3rd-party
|
||||
# library like python-adns, or move resolution onto its
|
||||
# own thread. This will be subject to the default libc
|
||||
# name resolution timeout (5s on most Linux boxes)
|
||||
try:
|
||||
return list(filter(cls.is_inet_4_or_6,
|
||||
socket.getaddrinfo(host, port, afi,
|
||||
socket.SOCK_STREAM)))
|
||||
except socket.gaierror as ex:
|
||||
log.warning("DNS lookup failed for proxy %s:%d, %r", host, port, ex)
|
||||
return []
|
||||
|
||||
def socket(self, family, sock_type):
|
||||
"""Open and record a socket.
|
||||
|
||||
Returns the actual underlying socket
|
||||
object to ensure e.g. selects and ssl wrapping works as expected.
|
||||
"""
|
||||
self._target_afi = family # Store the address family of the target
|
||||
afi, _, _, _, _ = self._proxy_addr
|
||||
self._sock = socket.socket(afi, sock_type)
|
||||
return self._sock
|
||||
|
||||
def _flush_buf(self):
|
||||
"""Send out all data that is stored in the outgoing buffer.
|
||||
|
||||
It is expected that the caller handles error handling, including non-blocking
|
||||
as well as connection failure exceptions.
|
||||
"""
|
||||
while self._buffer_out:
|
||||
sent_bytes = self._sock.send(self._buffer_out)
|
||||
self._buffer_out = self._buffer_out[sent_bytes:]
|
||||
|
||||
def _peek_buf(self, datalen):
|
||||
"""Ensure local inbound buffer has enough data, and return that data without
|
||||
consuming the local buffer
|
||||
|
||||
It's expected that the caller handles e.g. blocking exceptions"""
|
||||
while True:
|
||||
bytes_remaining = datalen - len(self._buffer_in)
|
||||
if bytes_remaining <= 0:
|
||||
break
|
||||
data = self._sock.recv(bytes_remaining)
|
||||
if not data:
|
||||
break
|
||||
self._buffer_in = self._buffer_in + data
|
||||
|
||||
return self._buffer_in[:datalen]
|
||||
|
||||
def _read_buf(self, datalen):
|
||||
"""Read and consume bytes from socket connection
|
||||
|
||||
It's expected that the caller handles e.g. blocking exceptions"""
|
||||
buf = self._peek_buf(datalen)
|
||||
if buf:
|
||||
self._buffer_in = self._buffer_in[len(buf):]
|
||||
return buf
|
||||
|
||||
def connect_ex(self, addr):
|
||||
"""Runs a state machine through connection to authentication to
|
||||
proxy connection request.
|
||||
|
||||
The somewhat strange setup is to facilitate non-intrusive use from
|
||||
BrokerConnection state machine.
|
||||
|
||||
This function is called with a socket in non-blocking mode. Both
|
||||
send and receive calls can return in EWOULDBLOCK/EAGAIN which we
|
||||
specifically avoid handling here. These are handled in main
|
||||
BrokerConnection connection loop, which then would retry calls
|
||||
to this function."""
|
||||
|
||||
if self._state == ProxyConnectionStates.DISCONNECTED:
|
||||
self._state = ProxyConnectionStates.CONNECTING
|
||||
|
||||
if self._state == ProxyConnectionStates.CONNECTING:
|
||||
_, _, _, _, sockaddr = self._proxy_addr
|
||||
ret = self._sock.connect_ex(sockaddr)
|
||||
if not ret or ret == errno.EISCONN:
|
||||
self._state = ProxyConnectionStates.NEGOTIATE_PROPOSE
|
||||
else:
|
||||
return ret
|
||||
|
||||
if self._state == ProxyConnectionStates.NEGOTIATE_PROPOSE:
|
||||
if self._proxy_url.username and self._proxy_url.password:
|
||||
# Propose username/password
|
||||
self._buffer_out = b"\x05\x01\x02"
|
||||
else:
|
||||
# Propose no auth
|
||||
self._buffer_out = b"\x05\x01\x00"
|
||||
self._state = ProxyConnectionStates.NEGOTIATING
|
||||
|
||||
if self._state == ProxyConnectionStates.NEGOTIATING:
|
||||
self._flush_buf()
|
||||
buf = self._read_buf(2)
|
||||
if buf[0:1] != b"\x05":
|
||||
log.error("Unrecognized SOCKS version")
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
|
||||
if buf[1:2] == b"\x00":
|
||||
# No authentication required
|
||||
self._state = ProxyConnectionStates.REQUEST_SUBMIT
|
||||
elif buf[1:2] == b"\x02":
|
||||
# Username/password authentication selected
|
||||
userlen = len(self._proxy_url.username)
|
||||
passlen = len(self._proxy_url.password)
|
||||
self._buffer_out = struct.pack(
|
||||
"!bb{}sb{}s".format(userlen, passlen),
|
||||
1, # version
|
||||
userlen,
|
||||
self._proxy_url.username.encode(),
|
||||
passlen,
|
||||
self._proxy_url.password.encode(),
|
||||
)
|
||||
self._state = ProxyConnectionStates.AUTHENTICATING
|
||||
else:
|
||||
log.error("Unrecognized SOCKS authentication method")
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
|
||||
if self._state == ProxyConnectionStates.AUTHENTICATING:
|
||||
self._flush_buf()
|
||||
buf = self._read_buf(2)
|
||||
if buf == b"\x01\x00":
|
||||
# Authentication succesful
|
||||
self._state = ProxyConnectionStates.REQUEST_SUBMIT
|
||||
else:
|
||||
log.error("Socks5 proxy authentication failure")
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
|
||||
if self._state == ProxyConnectionStates.REQUEST_SUBMIT:
|
||||
if self._target_afi == socket.AF_INET:
|
||||
addr_type = 1
|
||||
addr_len = 4
|
||||
elif self._target_afi == socket.AF_INET6:
|
||||
addr_type = 4
|
||||
addr_len = 16
|
||||
else:
|
||||
log.error("Unknown address family, %r", self._target_afi)
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
|
||||
self._buffer_out = struct.pack(
|
||||
"!bbbb{}sh".format(addr_len),
|
||||
5, # version
|
||||
1, # command: connect
|
||||
0, # reserved
|
||||
addr_type, # 1 for ipv4, 4 for ipv6 address
|
||||
socket.inet_pton(self._target_afi, addr[0]), # either 4 or 16 bytes of actual address
|
||||
addr[1], # port
|
||||
)
|
||||
self._state = ProxyConnectionStates.REQUESTING
|
||||
|
||||
if self._state == ProxyConnectionStates.REQUESTING:
|
||||
self._flush_buf()
|
||||
buf = self._read_buf(2)
|
||||
if buf[0:2] == b"\x05\x00":
|
||||
self._state = ProxyConnectionStates.READ_ADDRESS
|
||||
else:
|
||||
log.error("Proxy request failed: %r", buf[1:2])
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
|
||||
if self._state == ProxyConnectionStates.READ_ADDRESS:
|
||||
# we don't really care about the remote endpoint address, but need to clear the stream
|
||||
buf = self._peek_buf(2)
|
||||
if buf[0:2] == b"\x00\x01":
|
||||
_ = self._read_buf(2 + 4 + 2) # ipv4 address + port
|
||||
elif buf[0:2] == b"\x00\x05":
|
||||
_ = self._read_buf(2 + 16 + 2) # ipv6 address + port
|
||||
else:
|
||||
log.error("Unrecognized remote address type %r", buf[1:2])
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
self._state = ProxyConnectionStates.COMPLETE
|
||||
|
||||
if self._state == ProxyConnectionStates.COMPLETE:
|
||||
return 0
|
||||
|
||||
# not reached;
|
||||
# Send and recv will raise socket error on EWOULDBLOCK/EAGAIN that is assumed to be handled by
|
||||
# the caller. The caller re-enters this state machine from retry logic with timer or via select & family
|
||||
log.error("Internal error, state %r not handled correctly", self._state)
|
||||
self._state = ProxyConnectionStates.DISCONNECTED
|
||||
if self._sock:
|
||||
self._sock.close()
|
||||
return errno.ECONNREFUSED
|
||||
@@ -42,7 +42,7 @@ Keyword Arguments:
|
||||
this partition metadata.
|
||||
"""
|
||||
PartitionMetadata = namedtuple("PartitionMetadata",
|
||||
["topic", "partition", "leader", "leader_epoch", "replicas", "isr", "offline_replicas", "error"])
|
||||
["topic", "partition", "leader", "replicas", "isr", "error"])
|
||||
|
||||
|
||||
"""The Kafka offset commit API
|
||||
@@ -55,10 +55,10 @@ what time the commit was made, etc.
|
||||
Keyword Arguments:
|
||||
offset (int): The offset to be committed
|
||||
metadata (str): Non-null metadata
|
||||
leader_epoch (int): The last known epoch from the leader / broker
|
||||
"""
|
||||
OffsetAndMetadata = namedtuple("OffsetAndMetadata",
|
||||
["offset", "metadata", "leader_epoch"])
|
||||
# TODO add leaderEpoch: OffsetAndMetadata(offset, leaderEpoch, metadata)
|
||||
["offset", "metadata"])
|
||||
|
||||
|
||||
"""An offset and timestamp tuple
|
||||
@@ -66,10 +66,9 @@ OffsetAndMetadata = namedtuple("OffsetAndMetadata",
|
||||
Keyword Arguments:
|
||||
offset (int): An offset
|
||||
timestamp (int): The timestamp associated to the offset
|
||||
leader_epoch (int): The last known epoch from the leader / broker
|
||||
"""
|
||||
OffsetAndTimestamp = namedtuple("OffsetAndTimestamp",
|
||||
["offset", "timestamp", "leader_epoch"])
|
||||
["offset", "timestamp"])
|
||||
|
||||
MemberInformation = namedtuple("MemberInformation",
|
||||
["member_id", "client_id", "client_host", "member_metadata", "member_assignment"])
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
from __future__ import absolute_import, division
|
||||
from __future__ import absolute_import
|
||||
|
||||
import binascii
|
||||
import functools
|
||||
import re
|
||||
import time
|
||||
import weakref
|
||||
|
||||
from kafka.errors import KafkaTimeoutError
|
||||
from kafka.vendor import six
|
||||
|
||||
|
||||
@@ -23,69 +19,7 @@ if six.PY3:
|
||||
crc -= TO_SIGNED
|
||||
return crc
|
||||
else:
|
||||
from binascii import crc32 # noqa: F401
|
||||
|
||||
|
||||
class Timer:
|
||||
__slots__ = ('_start_at', '_expire_at', '_timeout_ms', '_error_message')
|
||||
|
||||
def __init__(self, timeout_ms, error_message=None, start_at=None):
|
||||
self._timeout_ms = timeout_ms
|
||||
self._start_at = start_at or time.time()
|
||||
if timeout_ms is not None:
|
||||
self._expire_at = self._start_at + timeout_ms / 1000
|
||||
else:
|
||||
self._expire_at = float('inf')
|
||||
self._error_message = error_message
|
||||
|
||||
@property
|
||||
def expired(self):
|
||||
return time.time() >= self._expire_at
|
||||
|
||||
@property
|
||||
def timeout_ms(self):
|
||||
if self._timeout_ms is None:
|
||||
return None
|
||||
elif self._expire_at == float('inf'):
|
||||
return float('inf')
|
||||
remaining = self._expire_at - time.time()
|
||||
if remaining < 0:
|
||||
return 0
|
||||
else:
|
||||
return int(remaining * 1000)
|
||||
|
||||
@property
|
||||
def elapsed_ms(self):
|
||||
return int(1000 * (time.time() - self._start_at))
|
||||
|
||||
def maybe_raise(self):
|
||||
if self.expired:
|
||||
raise KafkaTimeoutError(self._error_message)
|
||||
|
||||
def __str__(self):
|
||||
return "Timer(%s ms remaining)" % (self.timeout_ms)
|
||||
|
||||
# Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29
|
||||
TOPIC_MAX_LENGTH = 249
|
||||
TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$')
|
||||
|
||||
def ensure_valid_topic_name(topic):
|
||||
""" Ensures that the topic name is valid according to the kafka source. """
|
||||
|
||||
# See Kafka Source:
|
||||
# https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
|
||||
if topic is None:
|
||||
raise TypeError('All topics must not be None')
|
||||
if not isinstance(topic, six.string_types):
|
||||
raise TypeError('All topics must be strings')
|
||||
if len(topic) == 0:
|
||||
raise ValueError('All topics must be non-empty strings')
|
||||
if topic == '.' or topic == '..':
|
||||
raise ValueError('Topic name cannot be "." or ".."')
|
||||
if len(topic) > TOPIC_MAX_LENGTH:
|
||||
raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(TOPIC_MAX_LENGTH, topic))
|
||||
if not TOPIC_LEGAL_CHARS.match(topic):
|
||||
raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic))
|
||||
from binascii import crc32
|
||||
|
||||
|
||||
class WeakMethod(object):
|
||||
@@ -130,11 +64,3 @@ class Dict(dict):
|
||||
See: https://docs.python.org/2/library/weakref.html
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def synchronized(func):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
with self._lock:
|
||||
return func(self, *args, **kwargs)
|
||||
functools.update_wrapper(wrapper, func)
|
||||
return wrapper
|
||||
|
||||
@@ -15,11 +15,7 @@ The following code adapted from trollius.selectors.
|
||||
from __future__ import absolute_import
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import namedtuple
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
from collections import namedtuple, Mapping
|
||||
from errno import EINTR
|
||||
import math
|
||||
import select
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# pylint: skip-file
|
||||
|
||||
# Copyright (c) 2010-2020 Benjamin Peterson
|
||||
# Copyright (c) 2010-2017 Benjamin Peterson
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -31,7 +31,7 @@ import sys
|
||||
import types
|
||||
|
||||
__author__ = "Benjamin Peterson <benjamin@python.org>"
|
||||
__version__ = "1.16.0"
|
||||
__version__ = "1.11.0"
|
||||
|
||||
|
||||
# Useful for very coarse version differentiation.
|
||||
@@ -77,11 +77,6 @@ else:
|
||||
# https://github.com/dpkp/kafka-python/pull/979#discussion_r100403389
|
||||
# del X
|
||||
|
||||
if PY34:
|
||||
from importlib.util import spec_from_loader
|
||||
else:
|
||||
spec_from_loader = None
|
||||
|
||||
|
||||
def _add_doc(func, doc):
|
||||
"""Add documentation to a function."""
|
||||
@@ -197,11 +192,6 @@ class _SixMetaPathImporter(object):
|
||||
return self
|
||||
return None
|
||||
|
||||
def find_spec(self, fullname, path, target=None):
|
||||
if fullname in self.known_modules:
|
||||
return spec_from_loader(fullname, self)
|
||||
return None
|
||||
|
||||
def __get_module(self, fullname):
|
||||
try:
|
||||
return self.known_modules[fullname]
|
||||
@@ -239,12 +229,6 @@ class _SixMetaPathImporter(object):
|
||||
return None
|
||||
get_source = get_code # same as get_code
|
||||
|
||||
def create_module(self, spec):
|
||||
return self.load_module(spec.name)
|
||||
|
||||
def exec_module(self, module):
|
||||
pass
|
||||
|
||||
_importer = _SixMetaPathImporter(__name__)
|
||||
|
||||
|
||||
@@ -269,7 +253,7 @@ _moved_attributes = [
|
||||
MovedAttribute("reduce", "__builtin__", "functools"),
|
||||
MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
|
||||
MovedAttribute("StringIO", "StringIO", "io"),
|
||||
MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"),
|
||||
MovedAttribute("UserDict", "UserDict", "collections"),
|
||||
MovedAttribute("UserList", "UserList", "collections"),
|
||||
MovedAttribute("UserString", "UserString", "collections"),
|
||||
MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
|
||||
@@ -277,11 +261,9 @@ _moved_attributes = [
|
||||
MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
|
||||
MovedModule("builtins", "__builtin__"),
|
||||
MovedModule("configparser", "ConfigParser"),
|
||||
MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"),
|
||||
MovedModule("copyreg", "copy_reg"),
|
||||
MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
|
||||
MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"),
|
||||
MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"),
|
||||
MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
|
||||
MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
|
||||
MovedModule("http_cookies", "Cookie", "http.cookies"),
|
||||
MovedModule("html_entities", "htmlentitydefs", "html.entities"),
|
||||
@@ -661,16 +643,13 @@ if PY3:
|
||||
import io
|
||||
StringIO = io.StringIO
|
||||
BytesIO = io.BytesIO
|
||||
del io
|
||||
_assertCountEqual = "assertCountEqual"
|
||||
if sys.version_info[1] <= 1:
|
||||
_assertRaisesRegex = "assertRaisesRegexp"
|
||||
_assertRegex = "assertRegexpMatches"
|
||||
_assertNotRegex = "assertNotRegexpMatches"
|
||||
else:
|
||||
_assertRaisesRegex = "assertRaisesRegex"
|
||||
_assertRegex = "assertRegex"
|
||||
_assertNotRegex = "assertNotRegex"
|
||||
else:
|
||||
def b(s):
|
||||
return s
|
||||
@@ -692,7 +671,6 @@ else:
|
||||
_assertCountEqual = "assertItemsEqual"
|
||||
_assertRaisesRegex = "assertRaisesRegexp"
|
||||
_assertRegex = "assertRegexpMatches"
|
||||
_assertNotRegex = "assertNotRegexpMatches"
|
||||
_add_doc(b, """Byte literal""")
|
||||
_add_doc(u, """Text literal""")
|
||||
|
||||
@@ -709,10 +687,6 @@ def assertRegex(self, *args, **kwargs):
|
||||
return getattr(self, _assertRegex)(*args, **kwargs)
|
||||
|
||||
|
||||
def assertNotRegex(self, *args, **kwargs):
|
||||
return getattr(self, _assertNotRegex)(*args, **kwargs)
|
||||
|
||||
|
||||
if PY3:
|
||||
exec_ = getattr(moves.builtins, "exec")
|
||||
|
||||
@@ -748,7 +722,16 @@ else:
|
||||
""")
|
||||
|
||||
|
||||
if sys.version_info[:2] > (3,):
|
||||
if sys.version_info[:2] == (3, 2):
|
||||
exec_("""def raise_from(value, from_value):
|
||||
try:
|
||||
if from_value is None:
|
||||
raise value
|
||||
raise value from from_value
|
||||
finally:
|
||||
value = None
|
||||
""")
|
||||
elif sys.version_info[:2] > (3, 2):
|
||||
exec_("""def raise_from(value, from_value):
|
||||
try:
|
||||
raise value from from_value
|
||||
@@ -828,33 +811,13 @@ if sys.version_info[:2] < (3, 3):
|
||||
_add_doc(reraise, """Reraise an exception.""")
|
||||
|
||||
if sys.version_info[0:2] < (3, 4):
|
||||
# This does exactly the same what the :func:`py3:functools.update_wrapper`
|
||||
# function does on Python versions after 3.2. It sets the ``__wrapped__``
|
||||
# attribute on ``wrapper`` object and it doesn't raise an error if any of
|
||||
# the attributes mentioned in ``assigned`` and ``updated`` are missing on
|
||||
# ``wrapped`` object.
|
||||
def _update_wrapper(wrapper, wrapped,
|
||||
assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES):
|
||||
for attr in assigned:
|
||||
try:
|
||||
value = getattr(wrapped, attr)
|
||||
except AttributeError:
|
||||
continue
|
||||
else:
|
||||
setattr(wrapper, attr, value)
|
||||
for attr in updated:
|
||||
getattr(wrapper, attr).update(getattr(wrapped, attr, {}))
|
||||
wrapper.__wrapped__ = wrapped
|
||||
return wrapper
|
||||
_update_wrapper.__doc__ = functools.update_wrapper.__doc__
|
||||
|
||||
def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES):
|
||||
return functools.partial(_update_wrapper, wrapped=wrapped,
|
||||
assigned=assigned, updated=updated)
|
||||
wraps.__doc__ = functools.wraps.__doc__
|
||||
|
||||
def wrapper(f):
|
||||
f = functools.wraps(wrapped, assigned, updated)(f)
|
||||
f.__wrapped__ = wrapped
|
||||
return f
|
||||
return wrapper
|
||||
else:
|
||||
wraps = functools.wraps
|
||||
|
||||
@@ -867,15 +830,7 @@ def with_metaclass(meta, *bases):
|
||||
class metaclass(type):
|
||||
|
||||
def __new__(cls, name, this_bases, d):
|
||||
if sys.version_info[:2] >= (3, 7):
|
||||
# This version introduced PEP 560 that requires a bit
|
||||
# of extra care (we mimic what is done by __build_class__).
|
||||
resolved_bases = types.resolve_bases(bases)
|
||||
if resolved_bases is not bases:
|
||||
d['__orig_bases__'] = bases
|
||||
else:
|
||||
resolved_bases = bases
|
||||
return meta(name, resolved_bases, d)
|
||||
return meta(name, bases, d)
|
||||
|
||||
@classmethod
|
||||
def __prepare__(cls, name, this_bases):
|
||||
@@ -895,75 +850,13 @@ def add_metaclass(metaclass):
|
||||
orig_vars.pop(slots_var)
|
||||
orig_vars.pop('__dict__', None)
|
||||
orig_vars.pop('__weakref__', None)
|
||||
if hasattr(cls, '__qualname__'):
|
||||
orig_vars['__qualname__'] = cls.__qualname__
|
||||
return metaclass(cls.__name__, cls.__bases__, orig_vars)
|
||||
return wrapper
|
||||
|
||||
|
||||
def ensure_binary(s, encoding='utf-8', errors='strict'):
|
||||
"""Coerce **s** to six.binary_type.
|
||||
|
||||
For Python 2:
|
||||
- `unicode` -> encoded to `str`
|
||||
- `str` -> `str`
|
||||
|
||||
For Python 3:
|
||||
- `str` -> encoded to `bytes`
|
||||
- `bytes` -> `bytes`
|
||||
"""
|
||||
if isinstance(s, binary_type):
|
||||
return s
|
||||
if isinstance(s, text_type):
|
||||
return s.encode(encoding, errors)
|
||||
raise TypeError("not expecting type '%s'" % type(s))
|
||||
|
||||
|
||||
def ensure_str(s, encoding='utf-8', errors='strict'):
|
||||
"""Coerce *s* to `str`.
|
||||
|
||||
For Python 2:
|
||||
- `unicode` -> encoded to `str`
|
||||
- `str` -> `str`
|
||||
|
||||
For Python 3:
|
||||
- `str` -> `str`
|
||||
- `bytes` -> decoded to `str`
|
||||
"""
|
||||
# Optimization: Fast return for the common case.
|
||||
if type(s) is str:
|
||||
return s
|
||||
if PY2 and isinstance(s, text_type):
|
||||
return s.encode(encoding, errors)
|
||||
elif PY3 and isinstance(s, binary_type):
|
||||
return s.decode(encoding, errors)
|
||||
elif not isinstance(s, (text_type, binary_type)):
|
||||
raise TypeError("not expecting type '%s'" % type(s))
|
||||
return s
|
||||
|
||||
|
||||
def ensure_text(s, encoding='utf-8', errors='strict'):
|
||||
"""Coerce *s* to six.text_type.
|
||||
|
||||
For Python 2:
|
||||
- `unicode` -> `unicode`
|
||||
- `str` -> `unicode`
|
||||
|
||||
For Python 3:
|
||||
- `str` -> `str`
|
||||
- `bytes` -> decoded to `str`
|
||||
"""
|
||||
if isinstance(s, binary_type):
|
||||
return s.decode(encoding, errors)
|
||||
elif isinstance(s, text_type):
|
||||
return s
|
||||
else:
|
||||
raise TypeError("not expecting type '%s'" % type(s))
|
||||
|
||||
|
||||
def python_2_unicode_compatible(klass):
|
||||
"""
|
||||
A class decorator that defines __unicode__ and __str__ methods under Python 2.
|
||||
A decorator that defines __unicode__ and __str__ methods under Python 2.
|
||||
Under Python 3 it does nothing.
|
||||
|
||||
To support Python 2 and 3 with a single code base, define a __str__ method
|
||||
|
||||
@@ -53,23 +53,6 @@ if not hasattr(socket, "socketpair"):
|
||||
raise
|
||||
finally:
|
||||
lsock.close()
|
||||
|
||||
# Authenticating avoids using a connection from something else
|
||||
# able to connect to {host}:{port} instead of us.
|
||||
# We expect only AF_INET and AF_INET6 families.
|
||||
try:
|
||||
if (
|
||||
ssock.getsockname() != csock.getpeername()
|
||||
or csock.getsockname() != ssock.getpeername()
|
||||
):
|
||||
raise ConnectionError("Unexpected peer connection")
|
||||
except:
|
||||
# getsockname() and getpeername() can fail
|
||||
# if either socket isn't connected.
|
||||
ssock.close()
|
||||
csock.close()
|
||||
raise
|
||||
|
||||
return (ssock, csock)
|
||||
|
||||
socket.socketpair = socketpair
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '2.2.15'
|
||||
__version__ = '2.0.2'
|
||||
|
||||
Reference in New Issue
Block a user