This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python
|
||||
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import argparse
|
||||
import pprint
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from kafka import KafkaConsumer
|
||||
|
||||
|
||||
class ConsumerPerformance(object):
|
||||
@staticmethod
|
||||
def run(args):
|
||||
try:
|
||||
props = {}
|
||||
for prop in args.consumer_config:
|
||||
k, v = prop.split('=')
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
if v == 'None':
|
||||
v = None
|
||||
elif v == 'False':
|
||||
v = False
|
||||
elif v == 'True':
|
||||
v = True
|
||||
props[k] = v
|
||||
|
||||
print('Initializing Consumer...')
|
||||
props['bootstrap_servers'] = args.bootstrap_servers
|
||||
props['auto_offset_reset'] = 'earliest'
|
||||
if 'group_id' not in props:
|
||||
props['group_id'] = 'kafka-consumer-benchmark'
|
||||
if 'consumer_timeout_ms' not in props:
|
||||
props['consumer_timeout_ms'] = 10000
|
||||
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
||||
for k, v in props.items():
|
||||
print('---> {0}={1}'.format(k, v))
|
||||
consumer = KafkaConsumer(args.topic, **props)
|
||||
print('---> group_id={0}'.format(consumer.config['group_id']))
|
||||
print('---> report stats every {0} secs'.format(args.stats_interval))
|
||||
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
||||
timer_stop = threading.Event()
|
||||
timer = StatsReporter(args.stats_interval, consumer,
|
||||
event=timer_stop,
|
||||
raw_metrics=args.raw_metrics)
|
||||
timer.start()
|
||||
print('-> OK!')
|
||||
print()
|
||||
|
||||
start_time = time.time()
|
||||
records = 0
|
||||
for msg in consumer:
|
||||
records += 1
|
||||
if records >= args.num_records:
|
||||
break
|
||||
|
||||
end_time = time.time()
|
||||
timer_stop.set()
|
||||
timer.join()
|
||||
print('Consumed {0} records'.format(records))
|
||||
print('Execution time:', end_time - start_time, 'secs')
|
||||
|
||||
except Exception:
|
||||
exc_info = sys.exc_info()
|
||||
traceback.print_exception(*exc_info)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class StatsReporter(threading.Thread):
|
||||
def __init__(self, interval, consumer, event=None, raw_metrics=False):
|
||||
super(StatsReporter, self).__init__()
|
||||
self.interval = interval
|
||||
self.consumer = consumer
|
||||
self.event = event
|
||||
self.raw_metrics = raw_metrics
|
||||
|
||||
def print_stats(self):
|
||||
metrics = self.consumer.metrics()
|
||||
if self.raw_metrics:
|
||||
pprint.pprint(metrics)
|
||||
else:
|
||||
print('{records-consumed-rate} records/sec ({bytes-consumed-rate} B/sec),'
|
||||
' {fetch-latency-avg} latency,'
|
||||
' {fetch-rate} fetch/s,'
|
||||
' {fetch-size-avg} fetch size,'
|
||||
' {records-lag-max} max record lag,'
|
||||
' {records-per-request-avg} records/req'
|
||||
.format(**metrics['consumer-fetch-manager-metrics']))
|
||||
|
||||
|
||||
def print_final(self):
|
||||
self.print_stats()
|
||||
|
||||
def run(self):
|
||||
while self.event and not self.event.wait(self.interval):
|
||||
self.print_stats()
|
||||
else:
|
||||
self.print_final()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to verify the consumer performance.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=(),
|
||||
help='host:port for cluster bootstrap servers')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic for consumer test (default: kafka-python-benchmark-test)',
|
||||
default='kafka-python-benchmark-test')
|
||||
parser.add_argument(
|
||||
'--num-records', type=int,
|
||||
help='number of messages to consume (default: 1000000)',
|
||||
default=1000000)
|
||||
parser.add_argument(
|
||||
'--consumer-config', type=str, nargs='+', default=(),
|
||||
help='kafka consumer related configuration properties like '
|
||||
'bootstrap_servers,client_id etc..')
|
||||
parser.add_argument(
|
||||
'--fixture-compression', type=str,
|
||||
help='specify a compression type for use with broker fixtures / producer')
|
||||
parser.add_argument(
|
||||
'--stats-interval', type=int,
|
||||
help='Interval in seconds for stats reporting to console (default: 5)',
|
||||
default=5)
|
||||
parser.add_argument(
|
||||
'--raw-metrics', action='store_true',
|
||||
help='Enable this flag to print full metrics dict on each interval')
|
||||
return parser
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = get_args_parser().parse_args()
|
||||
ConsumerPerformance.run(args)
|
||||
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
from kafka import KafkaConsumer, KafkaProducer
|
||||
|
||||
|
||||
class Producer(threading.Thread):
|
||||
|
||||
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
||||
super(Producer, self).__init__()
|
||||
self.bootstrap_servers = bootstrap_servers
|
||||
self.topic = topic
|
||||
self.stop_event = stop_event
|
||||
self.big_msg = b'1' * msg_size
|
||||
|
||||
def run(self):
|
||||
producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers)
|
||||
self.sent = 0
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
producer.send(self.topic, self.big_msg)
|
||||
self.sent += 1
|
||||
producer.flush()
|
||||
producer.close()
|
||||
|
||||
|
||||
class Consumer(threading.Thread):
|
||||
def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
|
||||
super(Consumer, self).__init__()
|
||||
self.bootstrap_servers = bootstrap_servers
|
||||
self.topic = topic
|
||||
self.stop_event = stop_event
|
||||
self.msg_size = msg_size
|
||||
|
||||
def run(self):
|
||||
consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
|
||||
auto_offset_reset='earliest')
|
||||
consumer.subscribe([self.topic])
|
||||
self.valid = 0
|
||||
self.invalid = 0
|
||||
|
||||
for message in consumer:
|
||||
if len(message.value) == self.msg_size:
|
||||
self.valid += 1
|
||||
else:
|
||||
print('Invalid message:', len(message.value), self.msg_size)
|
||||
self.invalid += 1
|
||||
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
consumer.close()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to demonstrate consumer and producer load.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'),
|
||||
help='host:port for cluster bootstrap servers (default: localhost:9092)')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic for load test (default: kafka-python-benchmark-load-example)',
|
||||
default='kafka-python-benchmark-load-example')
|
||||
parser.add_argument(
|
||||
'--msg-size', type=int,
|
||||
help='Message size, in bytes, for load test (default: 524288)',
|
||||
default=524288)
|
||||
parser.add_argument(
|
||||
'--load-time', type=int,
|
||||
help='number of seconds to run load test (default: 10)',
|
||||
default=10)
|
||||
parser.add_argument(
|
||||
'--log-level', type=str,
|
||||
help='Optional logging level for load test: ERROR|INFO|DEBUG etc',
|
||||
default=None)
|
||||
return parser
|
||||
|
||||
|
||||
def main(args):
|
||||
if args.log_level:
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
|
||||
level=getattr(logging, args.log_level))
|
||||
producer_stop = threading.Event()
|
||||
consumer_stop = threading.Event()
|
||||
threads = [
|
||||
Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size),
|
||||
Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
time.sleep(args.load_time)
|
||||
producer_stop.set()
|
||||
consumer_stop.set()
|
||||
print('Messages sent: %d' % threads[0].sent)
|
||||
print('Messages recvd: %d' % threads[1].valid)
|
||||
print('Messages invalid: %d' % threads[1].invalid)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = get_args_parser().parse_args()
|
||||
main(args)
|
||||
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python
|
||||
# Adapted from https://github.com/mrafayaleem/kafka-jython
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import argparse
|
||||
import pprint
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from kafka.vendor.six.moves import range
|
||||
|
||||
from kafka import KafkaProducer
|
||||
|
||||
|
||||
class ProducerPerformance(object):
|
||||
@staticmethod
|
||||
def run(args):
|
||||
try:
|
||||
props = {}
|
||||
for prop in args.producer_config:
|
||||
k, v = prop.split('=')
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
if v == 'None':
|
||||
v = None
|
||||
elif v == 'False':
|
||||
v = False
|
||||
elif v == 'True':
|
||||
v = True
|
||||
props[k] = v
|
||||
|
||||
print('Initializing producer...')
|
||||
props['bootstrap_servers'] = args.bootstrap_servers
|
||||
record = bytes(bytearray(args.record_size))
|
||||
props['metrics_sample_window_ms'] = args.stats_interval * 1000
|
||||
|
||||
producer = KafkaProducer(**props)
|
||||
for k, v in props.items():
|
||||
print('---> {0}={1}'.format(k, v))
|
||||
print('---> send {0} byte records'.format(args.record_size))
|
||||
print('---> report stats every {0} secs'.format(args.stats_interval))
|
||||
print('---> raw metrics? {0}'.format(args.raw_metrics))
|
||||
timer_stop = threading.Event()
|
||||
timer = StatsReporter(args.stats_interval, producer,
|
||||
event=timer_stop,
|
||||
raw_metrics=args.raw_metrics)
|
||||
timer.start()
|
||||
print('-> OK!')
|
||||
print()
|
||||
|
||||
def _benchmark():
|
||||
results = []
|
||||
for i in range(args.num_records):
|
||||
results.append(producer.send(topic=args.topic, value=record))
|
||||
print("Send complete...")
|
||||
producer.flush()
|
||||
producer.close()
|
||||
count_success, count_failure = 0, 0
|
||||
for r in results:
|
||||
if r.succeeded():
|
||||
count_success += 1
|
||||
elif r.failed():
|
||||
count_failure += 1
|
||||
else:
|
||||
raise ValueError(r)
|
||||
print("%d suceeded, %d failed" % (count_success, count_failure))
|
||||
|
||||
start_time = time.time()
|
||||
_benchmark()
|
||||
end_time = time.time()
|
||||
timer_stop.set()
|
||||
timer.join()
|
||||
print('Execution time:', end_time - start_time, 'secs')
|
||||
|
||||
except Exception:
|
||||
exc_info = sys.exc_info()
|
||||
traceback.print_exception(*exc_info)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class StatsReporter(threading.Thread):
|
||||
def __init__(self, interval, producer, event=None, raw_metrics=False):
|
||||
super(StatsReporter, self).__init__()
|
||||
self.interval = interval
|
||||
self.producer = producer
|
||||
self.event = event
|
||||
self.raw_metrics = raw_metrics
|
||||
|
||||
def print_stats(self):
|
||||
metrics = self.producer.metrics()
|
||||
if not metrics:
|
||||
return
|
||||
if self.raw_metrics:
|
||||
pprint.pprint(metrics)
|
||||
else:
|
||||
print('{record-send-rate} records/sec ({byte-rate} B/sec),'
|
||||
' {request-latency-avg} latency,'
|
||||
' {record-size-avg} record size,'
|
||||
' {batch-size-avg} batch size,'
|
||||
' {records-per-request-avg} records/req'
|
||||
.format(**metrics['producer-metrics']))
|
||||
|
||||
def print_final(self):
|
||||
self.print_stats()
|
||||
|
||||
def run(self):
|
||||
while self.event and not self.event.wait(self.interval):
|
||||
self.print_stats()
|
||||
else:
|
||||
self.print_final()
|
||||
|
||||
|
||||
def get_args_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This tool is used to verify the producer performance.')
|
||||
|
||||
parser.add_argument(
|
||||
'--bootstrap-servers', type=str, nargs='+', default=(),
|
||||
help='host:port for cluster bootstrap server')
|
||||
parser.add_argument(
|
||||
'--topic', type=str,
|
||||
help='Topic name for test (default: kafka-python-benchmark-test)',
|
||||
default='kafka-python-benchmark-test')
|
||||
parser.add_argument(
|
||||
'--num-records', type=int,
|
||||
help='number of messages to produce (default: 1000000)',
|
||||
default=1000000)
|
||||
parser.add_argument(
|
||||
'--record-size', type=int,
|
||||
help='message size in bytes (default: 100)',
|
||||
default=100)
|
||||
parser.add_argument(
|
||||
'--producer-config', type=str, nargs='+', default=(),
|
||||
help='kafka producer related configuaration properties like '
|
||||
'bootstrap_servers,client_id etc..')
|
||||
parser.add_argument(
|
||||
'--stats-interval', type=int,
|
||||
help='Interval in seconds for stats reporting to console (default: 5)',
|
||||
default=5)
|
||||
parser.add_argument(
|
||||
'--raw-metrics', action='store_true',
|
||||
help='Enable this flag to print full metrics dict on each interval')
|
||||
return parser
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = get_args_parser().parse_args()
|
||||
ProducerPerformance.run(args)
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
import hashlib
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
|
||||
import pyperf
|
||||
|
||||
from kafka.record.memory_records import MemoryRecordsBuilder
|
||||
|
||||
|
||||
DEFAULT_BATCH_SIZE = 1600 * 1024
|
||||
KEY_SIZE = 6
|
||||
VALUE_SIZE = 60
|
||||
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
||||
|
||||
# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
|
||||
MESSAGES_PER_BATCH = 100
|
||||
|
||||
|
||||
def random_bytes(length):
|
||||
buffer = bytearray(length)
|
||||
for i in range(length):
|
||||
buffer[i] = random.randint(0, 255)
|
||||
return bytes(buffer)
|
||||
|
||||
|
||||
def prepare():
|
||||
return iter(itertools.cycle([
|
||||
(random_bytes(KEY_SIZE),
|
||||
random_bytes(VALUE_SIZE),
|
||||
random.randint(*TIMESTAMP_RANGE)
|
||||
)
|
||||
for _ in range(int(MESSAGES_PER_BATCH * 1.94))
|
||||
]))
|
||||
|
||||
|
||||
def finalize(results):
|
||||
# Just some strange code to make sure PyPy does execute the main code
|
||||
# properly, without optimizing it away
|
||||
hash_val = hashlib.md5()
|
||||
for buf in results:
|
||||
hash_val.update(buf)
|
||||
print(hash_val, file=open(os.devnull, "w"))
|
||||
|
||||
|
||||
def func(loops, magic):
|
||||
# Jit can optimize out the whole function if the result is the same each
|
||||
# time, so we need some randomized input data )
|
||||
precomputed_samples = prepare()
|
||||
results = []
|
||||
|
||||
# Main benchmark code.
|
||||
t0 = pyperf.perf_counter()
|
||||
for _ in range(loops):
|
||||
batch = MemoryRecordsBuilder(
|
||||
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
||||
for _ in range(MESSAGES_PER_BATCH):
|
||||
key, value, timestamp = next(precomputed_samples)
|
||||
size = batch.append(
|
||||
timestamp=timestamp, key=key, value=value)
|
||||
assert size
|
||||
batch.close()
|
||||
results.append(batch.buffer())
|
||||
|
||||
res = pyperf.perf_counter() - t0
|
||||
|
||||
finalize(results)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
runner = pyperf.Runner()
|
||||
runner.bench_time_func('batch_append_v0', func, 0)
|
||||
runner.bench_time_func('batch_append_v1', func, 1)
|
||||
runner.bench_time_func('batch_append_v2', func, 2)
|
||||
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import hashlib
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
|
||||
import pyperf
|
||||
|
||||
from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
|
||||
|
||||
|
||||
DEFAULT_BATCH_SIZE = 1600 * 1024
|
||||
KEY_SIZE = 6
|
||||
VALUE_SIZE = 60
|
||||
TIMESTAMP_RANGE = [1505824130000, 1505824140000]
|
||||
|
||||
BATCH_SAMPLES = 5
|
||||
MESSAGES_PER_BATCH = 100
|
||||
|
||||
|
||||
def random_bytes(length):
|
||||
buffer = bytearray(length)
|
||||
for i in range(length):
|
||||
buffer[i] = random.randint(0, 255)
|
||||
return bytes(buffer)
|
||||
|
||||
|
||||
def prepare(magic):
|
||||
samples = []
|
||||
for _ in range(BATCH_SAMPLES):
|
||||
batch = MemoryRecordsBuilder(
|
||||
magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
|
||||
for _ in range(MESSAGES_PER_BATCH):
|
||||
size = batch.append(
|
||||
random.randint(*TIMESTAMP_RANGE),
|
||||
random_bytes(KEY_SIZE),
|
||||
random_bytes(VALUE_SIZE),
|
||||
headers=[])
|
||||
assert size
|
||||
batch.close()
|
||||
samples.append(bytes(batch.buffer()))
|
||||
|
||||
return iter(itertools.cycle(samples))
|
||||
|
||||
|
||||
def finalize(results):
|
||||
# Just some strange code to make sure PyPy does execute the code above
|
||||
# properly
|
||||
hash_val = hashlib.md5()
|
||||
for buf in results:
|
||||
hash_val.update(buf)
|
||||
print(hash_val, file=open(os.devnull, "w"))
|
||||
|
||||
|
||||
def func(loops, magic):
|
||||
# Jit can optimize out the whole function if the result is the same each
|
||||
# time, so we need some randomized input data )
|
||||
precomputed_samples = prepare(magic)
|
||||
results = []
|
||||
|
||||
# Main benchmark code.
|
||||
batch_data = next(precomputed_samples)
|
||||
t0 = pyperf.perf_counter()
|
||||
for _ in range(loops):
|
||||
records = MemoryRecords(batch_data)
|
||||
while records.has_next():
|
||||
batch = records.next_batch()
|
||||
batch.validate_crc()
|
||||
for record in batch:
|
||||
results.append(record.value)
|
||||
|
||||
res = pyperf.perf_counter() - t0
|
||||
finalize(results)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
runner = pyperf.Runner()
|
||||
runner.bench_time_func('batch_read_v0', func, 0)
|
||||
runner.bench_time_func('batch_read_v1', func, 1)
|
||||
runner.bench_time_func('batch_read_v2', func, 2)
|
||||
@@ -0,0 +1,434 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import pyperf
|
||||
from kafka.vendor import six
|
||||
|
||||
|
||||
test_data = [
|
||||
(b"\x00", 0),
|
||||
(b"\x01", -1),
|
||||
(b"\x02", 1),
|
||||
(b"\x7E", 63),
|
||||
(b"\x7F", -64),
|
||||
(b"\x80\x01", 64),
|
||||
(b"\x81\x01", -65),
|
||||
(b"\xFE\x7F", 8191),
|
||||
(b"\xFF\x7F", -8192),
|
||||
(b"\x80\x80\x01", 8192),
|
||||
(b"\x81\x80\x01", -8193),
|
||||
(b"\xFE\xFF\x7F", 1048575),
|
||||
(b"\xFF\xFF\x7F", -1048576),
|
||||
(b"\x80\x80\x80\x01", 1048576),
|
||||
(b"\x81\x80\x80\x01", -1048577),
|
||||
(b"\xFE\xFF\xFF\x7F", 134217727),
|
||||
(b"\xFF\xFF\xFF\x7F", -134217728),
|
||||
(b"\x80\x80\x80\x80\x01", 134217728),
|
||||
(b"\x81\x80\x80\x80\x01", -134217729),
|
||||
(b"\xFE\xFF\xFF\xFF\x7F", 17179869183),
|
||||
(b"\xFF\xFF\xFF\xFF\x7F", -17179869184),
|
||||
(b"\x80\x80\x80\x80\x80\x01", 17179869184),
|
||||
(b"\x81\x80\x80\x80\x80\x01", -17179869185),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\x7F", 2199023255551),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\x7F", -2199023255552),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x01", 2199023255552),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x01", -2199023255553),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\x7F", 281474976710655),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -281474976710656),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x01", 281474976710656),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x01", -281474976710657),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 36028797018963967),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -36028797018963968),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x80\x01", 36028797018963968),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x80\x01", -36028797018963969),
|
||||
(b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 4611686018427387903),
|
||||
(b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -4611686018427387904),
|
||||
(b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 4611686018427387904),
|
||||
(b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x01", -4611686018427387905),
|
||||
]
|
||||
|
||||
|
||||
BENCH_VALUES_ENC = [
|
||||
60, # 1 byte
|
||||
-8192, # 2 bytes
|
||||
1048575, # 3 bytes
|
||||
134217727, # 4 bytes
|
||||
-17179869184, # 5 bytes
|
||||
2199023255551, # 6 bytes
|
||||
]
|
||||
|
||||
BENCH_VALUES_DEC = [
|
||||
b"\x7E", # 1 byte
|
||||
b"\xFF\x7F", # 2 bytes
|
||||
b"\xFE\xFF\x7F", # 3 bytes
|
||||
b"\xFF\xFF\xFF\x7F", # 4 bytes
|
||||
b"\x80\x80\x80\x80\x01", # 5 bytes
|
||||
b"\xFE\xFF\xFF\xFF\xFF\x7F", # 6 bytes
|
||||
]
|
||||
BENCH_VALUES_DEC = list(map(bytearray, BENCH_VALUES_DEC))
|
||||
|
||||
|
||||
def _assert_valid_enc(enc_func):
|
||||
for encoded, decoded in test_data:
|
||||
assert enc_func(decoded) == encoded, decoded
|
||||
|
||||
|
||||
def _assert_valid_dec(dec_func):
|
||||
for encoded, decoded in test_data:
|
||||
res, pos = dec_func(bytearray(encoded))
|
||||
assert res == decoded, (decoded, res)
|
||||
assert pos == len(encoded), (decoded, pos)
|
||||
|
||||
|
||||
def _assert_valid_size(size_func):
|
||||
for encoded, decoded in test_data:
|
||||
assert size_func(decoded) == len(encoded), decoded
|
||||
|
||||
|
||||
def encode_varint_1(num):
|
||||
""" Encode an integer to a varint presentation. See
|
||||
https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
|
||||
on how those can be produced.
|
||||
|
||||
Arguments:
|
||||
num (int): Value to encode
|
||||
|
||||
Returns:
|
||||
bytearray: Encoded presentation of integer with length from 1 to 10
|
||||
bytes
|
||||
"""
|
||||
# Shift sign to the end of number
|
||||
num = (num << 1) ^ (num >> 63)
|
||||
# Max 10 bytes. We assert those are allocated
|
||||
buf = bytearray(10)
|
||||
|
||||
for i in range(10):
|
||||
# 7 lowest bits from the number and set 8th if we still have pending
|
||||
# bits left to encode
|
||||
buf[i] = num & 0x7f | (0x80 if num > 0x7f else 0)
|
||||
num = num >> 7
|
||||
if num == 0:
|
||||
break
|
||||
else:
|
||||
# Max size of endcoded double is 10 bytes for unsigned values
|
||||
raise ValueError("Out of double range")
|
||||
return buf[:i + 1]
|
||||
|
||||
|
||||
def encode_varint_2(value, int2byte=six.int2byte):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
res = b""
|
||||
while value:
|
||||
res += int2byte(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
return res + int2byte(bits)
|
||||
|
||||
|
||||
def encode_varint_3(value, buf):
|
||||
append = buf.append
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
while value:
|
||||
append(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
append(bits)
|
||||
return value
|
||||
|
||||
|
||||
def encode_varint_4(value, int2byte=six.int2byte):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
if value <= 0x7f: # 1 byte
|
||||
return int2byte(value)
|
||||
if value <= 0x3fff: # 2 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + int2byte(value >> 7)
|
||||
if value <= 0x1fffff: # 3 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(value >> 14)
|
||||
if value <= 0xfffffff: # 4 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 14) & 0x7f)) + \
|
||||
int2byte(value >> 21)
|
||||
if value <= 0x7ffffffff: # 5 bytes
|
||||
return int2byte(0x80 | (value & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 7) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 14) & 0x7f)) + \
|
||||
int2byte(0x80 | ((value >> 21) & 0x7f)) + \
|
||||
int2byte(value >> 28)
|
||||
else:
|
||||
# Return to general algorithm
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
res = b""
|
||||
while value:
|
||||
res += int2byte(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
return res + int2byte(bits)
|
||||
|
||||
|
||||
def encode_varint_5(value, buf, pos=0):
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
while value:
|
||||
buf[pos] = 0x80 | bits
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
pos += 1
|
||||
buf[pos] = bits
|
||||
return pos + 1
|
||||
|
||||
def encode_varint_6(value, buf):
|
||||
append = buf.append
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
|
||||
if value <= 0x7f: # 1 byte
|
||||
append(value)
|
||||
return 1
|
||||
if value <= 0x3fff: # 2 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(value >> 7)
|
||||
return 2
|
||||
if value <= 0x1fffff: # 3 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(value >> 14)
|
||||
return 3
|
||||
if value <= 0xfffffff: # 4 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(0x80 | ((value >> 14) & 0x7f))
|
||||
append(value >> 21)
|
||||
return 4
|
||||
if value <= 0x7ffffffff: # 5 bytes
|
||||
append(0x80 | (value & 0x7f))
|
||||
append(0x80 | ((value >> 7) & 0x7f))
|
||||
append(0x80 | ((value >> 14) & 0x7f))
|
||||
append(0x80 | ((value >> 21) & 0x7f))
|
||||
append(value >> 28)
|
||||
return 5
|
||||
else:
|
||||
# Return to general algorithm
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
i = 0
|
||||
while value:
|
||||
append(0x80 | bits)
|
||||
bits = value & 0x7f
|
||||
value >>= 7
|
||||
i += 1
|
||||
append(bits)
|
||||
return i
|
||||
|
||||
|
||||
def size_of_varint_1(value):
|
||||
""" Number of bytes needed to encode an integer in variable-length format.
|
||||
"""
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
res = 0
|
||||
while True:
|
||||
res += 1
|
||||
value = value >> 7
|
||||
if value == 0:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
def size_of_varint_2(value):
|
||||
""" Number of bytes needed to encode an integer in variable-length format.
|
||||
"""
|
||||
value = (value << 1) ^ (value >> 63)
|
||||
if value <= 0x7f:
|
||||
return 1
|
||||
if value <= 0x3fff:
|
||||
return 2
|
||||
if value <= 0x1fffff:
|
||||
return 3
|
||||
if value <= 0xfffffff:
|
||||
return 4
|
||||
if value <= 0x7ffffffff:
|
||||
return 5
|
||||
if value <= 0x3ffffffffff:
|
||||
return 6
|
||||
if value <= 0x1ffffffffffff:
|
||||
return 7
|
||||
if value <= 0xffffffffffffff:
|
||||
return 8
|
||||
if value <= 0x7fffffffffffffff:
|
||||
return 9
|
||||
return 10
|
||||
|
||||
|
||||
if six.PY3:
|
||||
def _read_byte(memview, pos):
|
||||
""" Read a byte from memoryview as an integer
|
||||
|
||||
Raises:
|
||||
IndexError: if position is out of bounds
|
||||
"""
|
||||
return memview[pos]
|
||||
else:
|
||||
def _read_byte(memview, pos):
|
||||
""" Read a byte from memoryview as an integer
|
||||
|
||||
Raises:
|
||||
IndexError: if position is out of bounds
|
||||
"""
|
||||
return ord(memview[pos])
|
||||
|
||||
|
||||
def decode_varint_1(buffer, pos=0):
|
||||
""" Decode an integer from a varint presentation. See
|
||||
https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
|
||||
on how those can be produced.
|
||||
|
||||
Arguments:
|
||||
buffer (bytes-like): any object acceptable by ``memoryview``
|
||||
pos (int): optional position to read from
|
||||
|
||||
Returns:
|
||||
(int, int): Decoded int value and next read position
|
||||
"""
|
||||
value = 0
|
||||
shift = 0
|
||||
memview = memoryview(buffer)
|
||||
for i in range(pos, pos + 10):
|
||||
try:
|
||||
byte = _read_byte(memview, i)
|
||||
except IndexError:
|
||||
raise ValueError("End of byte stream")
|
||||
if byte & 0x80 != 0:
|
||||
value |= (byte & 0x7f) << shift
|
||||
shift += 7
|
||||
else:
|
||||
value |= byte << shift
|
||||
break
|
||||
else:
|
||||
# Max size of endcoded double is 10 bytes for unsigned values
|
||||
raise ValueError("Out of double range")
|
||||
# Normalize sign
|
||||
return (value >> 1) ^ -(value & 1), i + 1
|
||||
|
||||
|
||||
def decode_varint_2(buffer, pos=0):
|
||||
result = 0
|
||||
shift = 0
|
||||
while 1:
|
||||
b = buffer[pos]
|
||||
result |= ((b & 0x7f) << shift)
|
||||
pos += 1
|
||||
if not (b & 0x80):
|
||||
# result = result_type(() & mask)
|
||||
return ((result >> 1) ^ -(result & 1), pos)
|
||||
shift += 7
|
||||
if shift >= 64:
|
||||
raise ValueError("Out of int64 range")
|
||||
|
||||
|
||||
def decode_varint_3(buffer, pos=0):
|
||||
result = buffer[pos]
|
||||
if not (result & 0x81):
|
||||
return (result >> 1), pos + 1
|
||||
if not (result & 0x80):
|
||||
return (result >> 1) ^ (~0), pos + 1
|
||||
|
||||
result &= 0x7f
|
||||
pos += 1
|
||||
shift = 7
|
||||
while 1:
|
||||
b = buffer[pos]
|
||||
result |= ((b & 0x7f) << shift)
|
||||
pos += 1
|
||||
if not (b & 0x80):
|
||||
return ((result >> 1) ^ -(result & 1), pos)
|
||||
shift += 7
|
||||
if shift >= 64:
|
||||
raise ValueError("Out of int64 range")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_assert_valid_enc(encode_varint_1)
|
||||
_assert_valid_enc(encode_varint_2)
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray()
|
||||
encode_varint_3(decoded, res)
|
||||
assert res == encoded
|
||||
|
||||
_assert_valid_enc(encode_varint_4)
|
||||
|
||||
# import dis
|
||||
# dis.dis(encode_varint_4)
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray(10)
|
||||
written = encode_varint_5(decoded, res)
|
||||
assert res[:written] == encoded
|
||||
|
||||
for encoded, decoded in test_data:
|
||||
res = bytearray()
|
||||
encode_varint_6(decoded, res)
|
||||
assert res == encoded
|
||||
|
||||
_assert_valid_size(size_of_varint_1)
|
||||
_assert_valid_size(size_of_varint_2)
|
||||
_assert_valid_dec(decode_varint_1)
|
||||
_assert_valid_dec(decode_varint_2)
|
||||
_assert_valid_dec(decode_varint_3)
|
||||
|
||||
# import dis
|
||||
# dis.dis(decode_varint_3)
|
||||
|
||||
runner = pyperf.Runner()
|
||||
# Encode algorithms returning a bytes result
|
||||
for bench_func in [
|
||||
encode_varint_1,
|
||||
encode_varint_2,
|
||||
encode_varint_4]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
|
||||
# Encode algorithms writing to the buffer
|
||||
for bench_func in [
|
||||
encode_varint_3,
|
||||
encode_varint_5,
|
||||
encode_varint_6]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
fname = bench_func.__name__
|
||||
runner.timeit(
|
||||
'{}_{}byte'.format(fname, i + 1),
|
||||
stmt="{}({}, buffer)".format(fname, value),
|
||||
setup="from __main__ import {}; buffer = bytearray(10)".format(
|
||||
fname)
|
||||
)
|
||||
|
||||
# Size algorithms
|
||||
for bench_func in [
|
||||
size_of_varint_1,
|
||||
size_of_varint_2]:
|
||||
for i, value in enumerate(BENCH_VALUES_ENC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
|
||||
# Decode algorithms
|
||||
for bench_func in [
|
||||
decode_varint_1,
|
||||
decode_varint_2,
|
||||
decode_varint_3]:
|
||||
for i, value in enumerate(BENCH_VALUES_DEC):
|
||||
runner.bench_func(
|
||||
'{}_{}byte'.format(bench_func.__name__, i + 1),
|
||||
bench_func, value)
|
||||
Reference in New Issue
Block a user