API refactor

2025-10-07 16:25:52 +09:00
parent 76d0d86211
commit 91c7e04474
1171 changed files with 81940 additions and 44117 deletions
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/init.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/init.py
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/consumer_performance.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/consumer_performance.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# Adapted from https://github.com/mrafayaleem/kafka-jython
+
+from __future__ import absolute_import, print_function
+
+import argparse
+import pprint
+import sys
+import threading
+import time
+import traceback
+
+from kafka import KafkaConsumer
+
+
+class ConsumerPerformance(object):
+    @staticmethod
+    def run(args):
+        try:
+            props = {}
+            for prop in args.consumer_config:
+                k, v = prop.split('=')
+                try:
+                    v = int(v)
+                except ValueError:
+                    pass
+                if v == 'None':
+                    v = None
+                elif v == 'False':
+                    v = False
+                elif v == 'True':
+                    v = True
+                props[k] = v
+
+            print('Initializing Consumer...')
+            props['bootstrap_servers'] = args.bootstrap_servers
+            props['auto_offset_reset'] = 'earliest'
+            if 'group_id' not in props:
+                props['group_id'] = 'kafka-consumer-benchmark'
+            if 'consumer_timeout_ms' not in props:
+                props['consumer_timeout_ms'] = 10000
+            props['metrics_sample_window_ms'] = args.stats_interval * 1000
+            for k, v in props.items():
+                print('---> {0}={1}'.format(k, v))
+            consumer = KafkaConsumer(args.topic, **props)
+            print('---> group_id={0}'.format(consumer.config['group_id']))
+            print('---> report stats every {0} secs'.format(args.stats_interval))
+            print('---> raw metrics? {0}'.format(args.raw_metrics))
+            timer_stop = threading.Event()
+            timer = StatsReporter(args.stats_interval, consumer,
+                                  event=timer_stop,
+                                  raw_metrics=args.raw_metrics)
+            timer.start()
+            print('-> OK!')
+            print()
+
+            start_time = time.time()
+            records = 0
+            for msg in consumer:
+                records += 1
+                if records >= args.num_records:
+                    break
+
+            end_time = time.time()
+            timer_stop.set()
+            timer.join()
+            print('Consumed {0} records'.format(records))
+            print('Execution time:', end_time - start_time, 'secs')
+
+        except Exception:
+            exc_info = sys.exc_info()
+            traceback.print_exception(*exc_info)
+            sys.exit(1)
+
+
+class StatsReporter(threading.Thread):
+    def __init__(self, interval, consumer, event=None, raw_metrics=False):
+        super(StatsReporter, self).__init__()
+        self.interval = interval
+        self.consumer = consumer
+        self.event = event
+        self.raw_metrics = raw_metrics
+
+    def print_stats(self):
+        metrics = self.consumer.metrics()
+        if self.raw_metrics:
+            pprint.pprint(metrics)
+        else:
+            print('{records-consumed-rate} records/sec ({bytes-consumed-rate} B/sec),'
+                  ' {fetch-latency-avg} latency,'
+                  ' {fetch-rate} fetch/s,'
+                  ' {fetch-size-avg} fetch size,'
+                  ' {records-lag-max} max record lag,'
+                  ' {records-per-request-avg} records/req'
+                  .format(**metrics['consumer-fetch-manager-metrics']))
+
+
+    def print_final(self):
+        self.print_stats()
+
+    def run(self):
+        while self.event and not self.event.wait(self.interval):
+            self.print_stats()
+        else:
+            self.print_final()
+
+
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to verify the consumer performance.')
+
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=(),
+        help='host:port for cluster bootstrap servers')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic for consumer test (default: kafka-python-benchmark-test)',
+        default='kafka-python-benchmark-test')
+    parser.add_argument(
+        '--num-records', type=int,
+        help='number of messages to consume (default: 1000000)',
+        default=1000000)
+    parser.add_argument(
+        '--consumer-config', type=str, nargs='+', default=(),
+        help='kafka consumer related configuration properties like '
+             'bootstrap_servers,client_id etc..')
+    parser.add_argument(
+        '--fixture-compression', type=str,
+        help='specify a compression type for use with broker fixtures / producer')
+    parser.add_argument(
+        '--stats-interval', type=int,
+        help='Interval in seconds for stats reporting to console (default: 5)',
+        default=5)
+    parser.add_argument(
+        '--raw-metrics', action='store_true',
+        help='Enable this flag to print full metrics dict on each interval')
+    return parser
+
+
+if __name__ == '__main__':
+    args = get_args_parser().parse_args()
+    ConsumerPerformance.run(args)
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/load_example.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/load_example.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import argparse
+import logging
+import threading
+import time
+
+from kafka import KafkaConsumer, KafkaProducer
+
+
+class Producer(threading.Thread):
+
+    def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
+        super(Producer, self).__init__()
+        self.bootstrap_servers = bootstrap_servers
+        self.topic = topic
+        self.stop_event = stop_event
+        self.big_msg = b'1' * msg_size
+
+    def run(self):
+        producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers)
+        self.sent = 0
+
+        while not self.stop_event.is_set():
+            producer.send(self.topic, self.big_msg)
+            self.sent += 1
+        producer.flush()
+        producer.close()
+
+
+class Consumer(threading.Thread):
+    def __init__(self, bootstrap_servers, topic, stop_event, msg_size):
+        super(Consumer, self).__init__()
+        self.bootstrap_servers = bootstrap_servers
+        self.topic = topic
+        self.stop_event = stop_event
+        self.msg_size = msg_size
+
+    def run(self):
+        consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers,
+                                 auto_offset_reset='earliest')
+        consumer.subscribe([self.topic])
+        self.valid = 0
+        self.invalid = 0
+
+        for message in consumer:
+            if len(message.value) == self.msg_size:
+                self.valid += 1
+            else:
+                print('Invalid message:', len(message.value), self.msg_size)
+                self.invalid += 1
+
+            if self.stop_event.is_set():
+                break
+        consumer.close()
+
+
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to demonstrate consumer and producer load.')
+
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'),
+        help='host:port for cluster bootstrap servers (default: localhost:9092)')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic for load test (default: kafka-python-benchmark-load-example)',
+        default='kafka-python-benchmark-load-example')
+    parser.add_argument(
+        '--msg-size', type=int,
+        help='Message size, in bytes, for load test (default: 524288)',
+        default=524288)
+    parser.add_argument(
+        '--load-time', type=int,
+        help='number of seconds to run load test (default: 10)',
+        default=10)
+    parser.add_argument(
+        '--log-level', type=str,
+        help='Optional logging level for load test: ERROR|INFO|DEBUG etc',
+        default=None)
+    return parser
+
+
+def main(args):
+    if args.log_level:
+        logging.basicConfig(
+            format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
+            level=getattr(logging, args.log_level))
+    producer_stop = threading.Event()
+    consumer_stop = threading.Event()
+    threads = [
+        Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size),
+        Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size)
+    ]
+
+    for t in threads:
+        t.start()
+
+    time.sleep(args.load_time)
+    producer_stop.set()
+    consumer_stop.set()
+    print('Messages sent: %d' % threads[0].sent)
+    print('Messages recvd: %d' % threads[1].valid)
+    print('Messages invalid: %d' % threads[1].invalid)
+
+
+if __name__ == "__main__":
+    args = get_args_parser().parse_args()
+    main(args)
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/producer_performance.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/producer_performance.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# Adapted from https://github.com/mrafayaleem/kafka-jython
+
+from __future__ import absolute_import, print_function
+
+import argparse
+import pprint
+import sys
+import threading
+import time
+import traceback
+
+from kafka.vendor.six.moves import range
+
+from kafka import KafkaProducer
+
+
+class ProducerPerformance(object):
+    @staticmethod
+    def run(args):
+        try:
+            props = {}
+            for prop in args.producer_config:
+                k, v = prop.split('=')
+                try:
+                    v = int(v)
+                except ValueError:
+                    pass
+                if v == 'None':
+                    v = None
+                elif v == 'False':
+                    v = False
+                elif v == 'True':
+                    v = True
+                props[k] = v
+
+            print('Initializing producer...')
+            props['bootstrap_servers'] = args.bootstrap_servers
+            record = bytes(bytearray(args.record_size))
+            props['metrics_sample_window_ms'] = args.stats_interval * 1000
+
+            producer = KafkaProducer(**props)
+            for k, v in props.items():
+                print('---> {0}={1}'.format(k, v))
+            print('---> send {0} byte records'.format(args.record_size))
+            print('---> report stats every {0} secs'.format(args.stats_interval))
+            print('---> raw metrics? {0}'.format(args.raw_metrics))
+            timer_stop = threading.Event()
+            timer = StatsReporter(args.stats_interval, producer,
+                                  event=timer_stop,
+                                  raw_metrics=args.raw_metrics)
+            timer.start()
+            print('-> OK!')
+            print()
+
+            def _benchmark():
+                results = []
+                for i in range(args.num_records):
+                    results.append(producer.send(topic=args.topic, value=record))
+                print("Send complete...")
+                producer.flush()
+                producer.close()
+                count_success, count_failure = 0, 0
+                for r in results:
+                    if r.succeeded():
+                        count_success += 1
+                    elif r.failed():
+                        count_failure += 1
+                    else:
+                        raise ValueError(r)
+                print("%d suceeded, %d failed" % (count_success, count_failure))
+
+            start_time = time.time()
+            _benchmark()
+            end_time = time.time()
+            timer_stop.set()
+            timer.join()
+            print('Execution time:', end_time - start_time, 'secs')
+
+        except Exception:
+            exc_info = sys.exc_info()
+            traceback.print_exception(*exc_info)
+            sys.exit(1)
+
+
+class StatsReporter(threading.Thread):
+    def __init__(self, interval, producer, event=None, raw_metrics=False):
+        super(StatsReporter, self).__init__()
+        self.interval = interval
+        self.producer = producer
+        self.event = event
+        self.raw_metrics = raw_metrics
+
+    def print_stats(self):
+        metrics = self.producer.metrics()
+        if not metrics:
+            return
+        if self.raw_metrics:
+            pprint.pprint(metrics)
+        else:
+            print('{record-send-rate} records/sec ({byte-rate} B/sec),'
+                  ' {request-latency-avg} latency,'
+                  ' {record-size-avg} record size,'
+                  ' {batch-size-avg} batch size,'
+                  ' {records-per-request-avg} records/req'
+                  .format(**metrics['producer-metrics']))
+
+    def print_final(self):
+        self.print_stats()
+
+    def run(self):
+        while self.event and not self.event.wait(self.interval):
+            self.print_stats()
+        else:
+            self.print_final()
+
+
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        description='This tool is used to verify the producer performance.')
+
+    parser.add_argument(
+        '--bootstrap-servers', type=str, nargs='+', default=(),
+        help='host:port for cluster bootstrap server')
+    parser.add_argument(
+        '--topic', type=str,
+        help='Topic name for test (default: kafka-python-benchmark-test)',
+        default='kafka-python-benchmark-test')
+    parser.add_argument(
+        '--num-records', type=int,
+        help='number of messages to produce (default: 1000000)',
+        default=1000000)
+    parser.add_argument(
+        '--record-size', type=int,
+        help='message size in bytes (default: 100)',
+        default=100)
+    parser.add_argument(
+        '--producer-config', type=str, nargs='+', default=(),
+        help='kafka producer related configuaration properties like '
+             'bootstrap_servers,client_id etc..')
+    parser.add_argument(
+        '--stats-interval', type=int,
+        help='Interval in seconds for stats reporting to console (default: 5)',
+        default=5)
+    parser.add_argument(
+        '--raw-metrics', action='store_true',
+        help='Enable this flag to print full metrics dict on each interval')
+    return parser
+
+
+if __name__ == '__main__':
+    args = get_args_parser().parse_args()
+    ProducerPerformance.run(args)
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/record_batch_compose.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/record_batch_compose.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+from __future__ import print_function
+import hashlib
+import itertools
+import os
+import random
+
+import pyperf
+
+from kafka.record.memory_records import MemoryRecordsBuilder
+
+
+DEFAULT_BATCH_SIZE = 1600 * 1024
+KEY_SIZE = 6
+VALUE_SIZE = 60
+TIMESTAMP_RANGE = [1505824130000, 1505824140000]
+
+# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
+MESSAGES_PER_BATCH = 100
+
+
+def random_bytes(length):
+    buffer = bytearray(length)
+    for i in range(length):
+        buffer[i] = random.randint(0, 255)
+    return bytes(buffer)
+
+
+def prepare():
+    return iter(itertools.cycle([
+        (random_bytes(KEY_SIZE),
+         random_bytes(VALUE_SIZE),
+         random.randint(*TIMESTAMP_RANGE)
+         )
+        for _ in range(int(MESSAGES_PER_BATCH * 1.94))
+    ]))
+
+
+def finalize(results):
+    # Just some strange code to make sure PyPy does execute the main code
+    # properly, without optimizing it away
+    hash_val = hashlib.md5()
+    for buf in results:
+        hash_val.update(buf)
+    print(hash_val, file=open(os.devnull, "w"))
+
+
+def func(loops, magic):
+    # Jit can optimize out the whole function if the result is the same each
+    # time, so we need some randomized input data )
+    precomputed_samples = prepare()
+    results = []
+
+    # Main benchmark code.
+    t0 = pyperf.perf_counter()
+    for _ in range(loops):
+        batch = MemoryRecordsBuilder(
+            magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
+        for _ in range(MESSAGES_PER_BATCH):
+            key, value, timestamp = next(precomputed_samples)
+            size = batch.append(
+                timestamp=timestamp, key=key, value=value)
+            assert size
+        batch.close()
+        results.append(batch.buffer())
+
+    res = pyperf.perf_counter() - t0
+
+    finalize(results)
+
+    return res
+
+
+if __name__ == '__main__':
+    runner = pyperf.Runner()
+    runner.bench_time_func('batch_append_v0', func, 0)
+    runner.bench_time_func('batch_append_v1', func, 1)
+    runner.bench_time_func('batch_append_v2', func, 2)
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/record_batch_read.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/record_batch_read.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import hashlib
+import itertools
+import os
+import random
+
+import pyperf
+
+from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
+
+
+DEFAULT_BATCH_SIZE = 1600 * 1024
+KEY_SIZE = 6
+VALUE_SIZE = 60
+TIMESTAMP_RANGE = [1505824130000, 1505824140000]
+
+BATCH_SAMPLES = 5
+MESSAGES_PER_BATCH = 100
+
+
+def random_bytes(length):
+    buffer = bytearray(length)
+    for i in range(length):
+        buffer[i] = random.randint(0, 255)
+    return bytes(buffer)
+
+
+def prepare(magic):
+    samples = []
+    for _ in range(BATCH_SAMPLES):
+        batch = MemoryRecordsBuilder(
+            magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0)
+        for _ in range(MESSAGES_PER_BATCH):
+            size = batch.append(
+                random.randint(*TIMESTAMP_RANGE),
+                random_bytes(KEY_SIZE),
+                random_bytes(VALUE_SIZE),
+                headers=[])
+            assert size
+        batch.close()
+        samples.append(bytes(batch.buffer()))
+
+    return iter(itertools.cycle(samples))
+
+
+def finalize(results):
+    # Just some strange code to make sure PyPy does execute the code above
+    # properly
+    hash_val = hashlib.md5()
+    for buf in results:
+        hash_val.update(buf)
+    print(hash_val, file=open(os.devnull, "w"))
+
+
+def func(loops, magic):
+    # Jit can optimize out the whole function if the result is the same each
+    # time, so we need some randomized input data )
+    precomputed_samples = prepare(magic)
+    results = []
+
+    # Main benchmark code.
+    batch_data = next(precomputed_samples)
+    t0 = pyperf.perf_counter()
+    for _ in range(loops):
+        records = MemoryRecords(batch_data)
+        while records.has_next():
+            batch = records.next_batch()
+            batch.validate_crc()
+            for record in batch:
+                results.append(record.value)
+
+    res = pyperf.perf_counter() - t0
+    finalize(results)
+
+    return res
+
+
+if __name__ == '__main__':
+    runner = pyperf.Runner()
+    runner.bench_time_func('batch_read_v0', func, 0)
+    runner.bench_time_func('batch_read_v1', func, 1)
+    runner.bench_time_func('batch_read_v2', func, 2)
--- a/venv/lib/python3.12/site-packages/kafka/benchmarks/varint_speed.py
+++ b/venv/lib/python3.12/site-packages/kafka/benchmarks/varint_speed.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import pyperf
+from kafka.vendor import six
+
+
+test_data = [
+    (b"\x00", 0),
+    (b"\x01", -1),
+    (b"\x02", 1),
+    (b"\x7E", 63),
+    (b"\x7F", -64),
+    (b"\x80\x01", 64),
+    (b"\x81\x01", -65),
+    (b"\xFE\x7F", 8191),
+    (b"\xFF\x7F", -8192),
+    (b"\x80\x80\x01", 8192),
+    (b"\x81\x80\x01", -8193),
+    (b"\xFE\xFF\x7F", 1048575),
+    (b"\xFF\xFF\x7F", -1048576),
+    (b"\x80\x80\x80\x01", 1048576),
+    (b"\x81\x80\x80\x01", -1048577),
+    (b"\xFE\xFF\xFF\x7F", 134217727),
+    (b"\xFF\xFF\xFF\x7F", -134217728),
+    (b"\x80\x80\x80\x80\x01", 134217728),
+    (b"\x81\x80\x80\x80\x01", -134217729),
+    (b"\xFE\xFF\xFF\xFF\x7F", 17179869183),
+    (b"\xFF\xFF\xFF\xFF\x7F", -17179869184),
+    (b"\x80\x80\x80\x80\x80\x01", 17179869184),
+    (b"\x81\x80\x80\x80\x80\x01", -17179869185),
+    (b"\xFE\xFF\xFF\xFF\xFF\x7F", 2199023255551),
+    (b"\xFF\xFF\xFF\xFF\xFF\x7F", -2199023255552),
+    (b"\x80\x80\x80\x80\x80\x80\x01", 2199023255552),
+    (b"\x81\x80\x80\x80\x80\x80\x01", -2199023255553),
+    (b"\xFE\xFF\xFF\xFF\xFF\xFF\x7F", 281474976710655),
+    (b"\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -281474976710656),
+    (b"\x80\x80\x80\x80\x80\x80\x80\x01", 281474976710656),
+    (b"\x81\x80\x80\x80\x80\x80\x80\x01", -281474976710657),
+    (b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 36028797018963967),
+    (b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -36028797018963968),
+    (b"\x80\x80\x80\x80\x80\x80\x80\x80\x01", 36028797018963968),
+    (b"\x81\x80\x80\x80\x80\x80\x80\x80\x01", -36028797018963969),
+    (b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 4611686018427387903),
+    (b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -4611686018427387904),
+    (b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 4611686018427387904),
+    (b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x01", -4611686018427387905),
+]
+
+
+BENCH_VALUES_ENC = [
+    60,  # 1 byte
+    -8192,  # 2 bytes
+    1048575,  # 3 bytes
+    134217727,  # 4 bytes
+    -17179869184,  # 5 bytes
+    2199023255551,  # 6 bytes
+]
+
+BENCH_VALUES_DEC = [
+    b"\x7E",  # 1 byte
+    b"\xFF\x7F",  # 2 bytes
+    b"\xFE\xFF\x7F",  # 3 bytes
+    b"\xFF\xFF\xFF\x7F",  # 4 bytes
+    b"\x80\x80\x80\x80\x01",  # 5 bytes
+    b"\xFE\xFF\xFF\xFF\xFF\x7F",  # 6 bytes
+]
+BENCH_VALUES_DEC = list(map(bytearray, BENCH_VALUES_DEC))
+
+
+def _assert_valid_enc(enc_func):
+    for encoded, decoded in test_data:
+        assert enc_func(decoded) == encoded, decoded
+
+
+def _assert_valid_dec(dec_func):
+    for encoded, decoded in test_data:
+        res, pos = dec_func(bytearray(encoded))
+        assert res == decoded, (decoded, res)
+        assert pos == len(encoded), (decoded, pos)
+
+
+def _assert_valid_size(size_func):
+    for encoded, decoded in test_data:
+        assert size_func(decoded) == len(encoded), decoded
+
+
+def encode_varint_1(num):
+    """ Encode an integer to a varint presentation. See
+    https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
+    on how those can be produced.
+
+        Arguments:
+            num (int): Value to encode
+
+        Returns:
+            bytearray: Encoded presentation of integer with length from 1 to 10
+                 bytes
+    """
+    # Shift sign to the end of number
+    num = (num << 1) ^ (num >> 63)
+    # Max 10 bytes. We assert those are allocated
+    buf = bytearray(10)
+
+    for i in range(10):
+        # 7 lowest bits from the number and set 8th if we still have pending
+        # bits left to encode
+        buf[i] = num & 0x7f | (0x80 if num > 0x7f else 0)
+        num = num >> 7
+        if num == 0:
+            break
+    else:
+        # Max size of endcoded double is 10 bytes for unsigned values
+        raise ValueError("Out of double range")
+    return buf[:i + 1]
+
+
+def encode_varint_2(value, int2byte=six.int2byte):
+    value = (value << 1) ^ (value >> 63)
+
+    bits = value & 0x7f
+    value >>= 7
+    res = b""
+    while value:
+        res += int2byte(0x80 | bits)
+        bits = value & 0x7f
+        value >>= 7
+    return res + int2byte(bits)
+
+
+def encode_varint_3(value, buf):
+    append = buf.append
+    value = (value << 1) ^ (value >> 63)
+
+    bits = value & 0x7f
+    value >>= 7
+    while value:
+        append(0x80 | bits)
+        bits = value & 0x7f
+        value >>= 7
+    append(bits)
+    return value
+
+
+def encode_varint_4(value, int2byte=six.int2byte):
+    value = (value << 1) ^ (value >> 63)
+
+    if value <= 0x7f:  # 1 byte
+        return int2byte(value)
+    if value <= 0x3fff:  # 2 bytes
+        return int2byte(0x80 | (value & 0x7f)) + int2byte(value >> 7)
+    if value <= 0x1fffff:  # 3 bytes
+        return int2byte(0x80 | (value & 0x7f)) + \
+            int2byte(0x80 | ((value >> 7) & 0x7f)) + \
+            int2byte(value >> 14)
+    if value <= 0xfffffff:  # 4 bytes
+        return int2byte(0x80 | (value & 0x7f)) + \
+            int2byte(0x80 | ((value >> 7) & 0x7f)) + \
+            int2byte(0x80 | ((value >> 14) & 0x7f)) + \
+            int2byte(value >> 21)
+    if value <= 0x7ffffffff:  # 5 bytes
+        return int2byte(0x80 | (value & 0x7f)) + \
+            int2byte(0x80 | ((value >> 7) & 0x7f)) + \
+            int2byte(0x80 | ((value >> 14) & 0x7f)) + \
+            int2byte(0x80 | ((value >> 21) & 0x7f)) + \
+            int2byte(value >> 28)
+    else:
+        # Return to general algorithm
+        bits = value & 0x7f
+        value >>= 7
+        res = b""
+        while value:
+            res += int2byte(0x80 | bits)
+            bits = value & 0x7f
+            value >>= 7
+        return res + int2byte(bits)
+
+
+def encode_varint_5(value, buf, pos=0):
+    value = (value << 1) ^ (value >> 63)
+
+    bits = value & 0x7f
+    value >>= 7
+    while value:
+        buf[pos] = 0x80 | bits
+        bits = value & 0x7f
+        value >>= 7
+        pos += 1
+    buf[pos] = bits
+    return pos + 1
+
+def encode_varint_6(value, buf):
+    append = buf.append
+    value = (value << 1) ^ (value >> 63)
+
+    if value <= 0x7f:  # 1 byte
+        append(value)
+        return 1
+    if value <= 0x3fff:  # 2 bytes
+        append(0x80 | (value & 0x7f))
+        append(value >> 7)
+        return 2
+    if value <= 0x1fffff:  # 3 bytes
+        append(0x80 | (value & 0x7f))
+        append(0x80 | ((value >> 7) & 0x7f))
+        append(value >> 14)
+        return 3
+    if value <= 0xfffffff:  # 4 bytes
+        append(0x80 | (value & 0x7f))
+        append(0x80 | ((value >> 7) & 0x7f))
+        append(0x80 | ((value >> 14) & 0x7f))
+        append(value >> 21)
+        return 4
+    if value <= 0x7ffffffff:  # 5 bytes
+        append(0x80 | (value & 0x7f))
+        append(0x80 | ((value >> 7) & 0x7f))
+        append(0x80 | ((value >> 14) & 0x7f))
+        append(0x80 | ((value >> 21) & 0x7f))
+        append(value >> 28)
+        return 5
+    else:
+        # Return to general algorithm
+        bits = value & 0x7f
+        value >>= 7
+        i = 0
+        while value:
+            append(0x80 | bits)
+            bits = value & 0x7f
+            value >>= 7
+            i += 1
+    append(bits)
+    return i
+
+
+def size_of_varint_1(value):
+    """ Number of bytes needed to encode an integer in variable-length format.
+    """
+    value = (value << 1) ^ (value >> 63)
+    res = 0
+    while True:
+        res += 1
+        value = value >> 7
+        if value == 0:
+            break
+    return res
+
+
+def size_of_varint_2(value):
+    """ Number of bytes needed to encode an integer in variable-length format.
+    """
+    value = (value << 1) ^ (value >> 63)
+    if value <= 0x7f:
+        return 1
+    if value <= 0x3fff:
+        return 2
+    if value <= 0x1fffff:
+        return 3
+    if value <= 0xfffffff:
+        return 4
+    if value <= 0x7ffffffff:
+        return 5
+    if value <= 0x3ffffffffff:
+        return 6
+    if value <= 0x1ffffffffffff:
+        return 7
+    if value <= 0xffffffffffffff:
+        return 8
+    if value <= 0x7fffffffffffffff:
+        return 9
+    return 10
+
+
+if six.PY3:
+    def _read_byte(memview, pos):
+        """ Read a byte from memoryview as an integer
+
+            Raises:
+                IndexError: if position is out of bounds
+        """
+        return memview[pos]
+else:
+    def _read_byte(memview, pos):
+        """ Read a byte from memoryview as an integer
+
+            Raises:
+                IndexError: if position is out of bounds
+        """
+        return ord(memview[pos])
+
+
+def decode_varint_1(buffer, pos=0):
+    """ Decode an integer from a varint presentation. See
+    https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
+    on how those can be produced.
+
+        Arguments:
+            buffer (bytes-like): any object acceptable by ``memoryview``
+            pos (int): optional position to read from
+
+        Returns:
+            (int, int): Decoded int value and next read position
+    """
+    value = 0
+    shift = 0
+    memview = memoryview(buffer)
+    for i in range(pos, pos + 10):
+        try:
+            byte = _read_byte(memview, i)
+        except IndexError:
+            raise ValueError("End of byte stream")
+        if byte & 0x80 != 0:
+            value |= (byte & 0x7f) << shift
+            shift += 7
+        else:
+            value |= byte << shift
+            break
+    else:
+        # Max size of endcoded double is 10 bytes for unsigned values
+        raise ValueError("Out of double range")
+    # Normalize sign
+    return (value >> 1) ^ -(value & 1), i + 1
+
+
+def decode_varint_2(buffer, pos=0):
+    result = 0
+    shift = 0
+    while 1:
+        b = buffer[pos]
+        result |= ((b & 0x7f) << shift)
+        pos += 1
+        if not (b & 0x80):
+            # result = result_type(() & mask)
+            return ((result >> 1) ^ -(result & 1), pos)
+        shift += 7
+        if shift >= 64:
+            raise ValueError("Out of int64 range")
+
+
+def decode_varint_3(buffer, pos=0):
+    result = buffer[pos]
+    if not (result & 0x81):
+        return (result >> 1), pos + 1
+    if not (result & 0x80):
+        return (result >> 1) ^ (~0), pos + 1
+
+    result &= 0x7f
+    pos += 1
+    shift = 7
+    while 1:
+        b = buffer[pos]
+        result |= ((b & 0x7f) << shift)
+        pos += 1
+        if not (b & 0x80):
+            return ((result >> 1) ^ -(result & 1), pos)
+        shift += 7
+        if shift >= 64:
+            raise ValueError("Out of int64 range")
+
+
+if __name__ == '__main__':
+    _assert_valid_enc(encode_varint_1)
+    _assert_valid_enc(encode_varint_2)
+
+    for encoded, decoded in test_data:
+        res = bytearray()
+        encode_varint_3(decoded, res)
+        assert res == encoded
+
+    _assert_valid_enc(encode_varint_4)
+
+    # import dis
+    # dis.dis(encode_varint_4)
+
+    for encoded, decoded in test_data:
+        res = bytearray(10)
+        written = encode_varint_5(decoded, res)
+        assert res[:written] == encoded
+
+    for encoded, decoded in test_data:
+        res = bytearray()
+        encode_varint_6(decoded, res)
+        assert res == encoded
+
+    _assert_valid_size(size_of_varint_1)
+    _assert_valid_size(size_of_varint_2)
+    _assert_valid_dec(decode_varint_1)
+    _assert_valid_dec(decode_varint_2)
+    _assert_valid_dec(decode_varint_3)
+
+    # import dis
+    # dis.dis(decode_varint_3)
+
+    runner = pyperf.Runner()
+    # Encode algorithms returning a bytes result
+    for bench_func in [
+            encode_varint_1,
+            encode_varint_2,
+            encode_varint_4]:
+        for i, value in enumerate(BENCH_VALUES_ENC):
+            runner.bench_func(
+                '{}_{}byte'.format(bench_func.__name__, i + 1),
+                bench_func, value)
+
+    # Encode algorithms writing to the buffer
+    for bench_func in [
+            encode_varint_3,
+            encode_varint_5,
+            encode_varint_6]:
+        for i, value in enumerate(BENCH_VALUES_ENC):
+            fname = bench_func.__name__
+            runner.timeit(
+                '{}_{}byte'.format(fname, i + 1),
+                stmt="{}({}, buffer)".format(fname, value),
+                setup="from __main__ import {}; buffer = bytearray(10)".format(
+                    fname)
+            )
+
+    # Size algorithms
+    for bench_func in [
+            size_of_varint_1,
+            size_of_varint_2]:
+        for i, value in enumerate(BENCH_VALUES_ENC):
+            runner.bench_func(
+                '{}_{}byte'.format(bench_func.__name__, i + 1),
+                bench_func, value)
+
+    # Decode algorithms
+    for bench_func in [
+            decode_varint_1,
+            decode_varint_2,
+            decode_varint_3]:
+        for i, value in enumerate(BENCH_VALUES_DEC):
+            runner.bench_func(
+                '{}_{}byte'.format(bench_func.__name__, i + 1),
+                bench_func, value)