pykafka: 生产者和消费者的实现以及模拟(produce/consume)

pykafka生产者模型

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "Victor"
# Date: 2020/6/18


import sys
import json
from pykafka import KafkaClient
KAFKA_HOSTS = ["bjxg-bd-slave01:9092", "bjxg-bd-slave02:9092", "bjxg-bd-slave03:9092", "bjxg-bd-slave04:9092", "bjxg-bd-slave05:9092"]

if __name__ == '__main__':

    client = KafkaClient(hosts=KAFKA_HOSTS)
    topics = client.topics
    topic = topics['test_topic']
    print(topic)


    written_msgs = 0
    with topic.get_producer() as producer:
        for line in sys.stdin:
            
            # 输入的时候用tab隔离开
            line = line.strip('\n')
            items = line.split('\t')
            user_id = items[0]
            device_id = items[1]

            producer.produce(json.dumps({'user_id': user_id, 'device_id':device_id}))
            written_msgs += 1

            if written_msgs % 1000 == 0:
                print("written_msgs: %d" % written_msgs)
    print("written_msgs: %d" % written_msgs)

消费者模型

import json
from pykafka import KafkaClient

KAFKA_HOSTS = ["bjxg-bd-slave01:9092", "bjxg-bd-slave02:9092", "bjxg-bd-slave03:9092", "bjxg-bd-slave04:9092", "bjxg-bd-slave05:9092"]
TOPIC = "ad-behavior.xxxxxxx-v2"

if __name__ == '__main__':

    """pykafka的消费程序, 注意输入集群的全部机器地址, 不然会产生其他问题
    """

    client = KafkaClient(hosts=KAFKA_HOSTS)
    all_topics = client.topics
    print("all the topics: ", all_topics)
    topic = client.topics[TOPIC]

    # 获取消费者, 注意组名不要轻易变化, 不然容易引起kafka的负载均衡的问题
    # auto_commit_enable配置项的作用是当配置为true时,每次获取到消息后就会自动更新存储在zookepper中的offset值==> 简单说就是避免重复消费
    consumer = topic.get_balanced_consumer('MY_GROUP1', auto_commit_enable=True, auto_commit_interval_ms=3000)
    for message in consumer:
        if message is not None:
            print(message.offset, message.value)

    # 这个写法好像被废弃了
    consumer = topic.get_simple_consumer(consumer_group="mygroup",auto_commit_enable=True)
    for message in consumer:
        if message is not None:
            print(message.offset, message.value)

不用任何模块简单模拟生产者和消费者


import time, random
import queue,threading
 
q = queue.Queue(5)
 
def Producer(name):
  count = 0
  while count <10:
    q.put(count)
    print(f'Producer {name} has produced {count}...')
    count +=1
    time.sleep(1)

 
def Consumer(name):
  count = 0
  while count <10:
    
    if not q.empty():
        data = q.get()
        print(f'Consumer {name} has consumed {data} ...')
    else:
        print("-----no producer anymore----")
    count +=1
    time.sleep(1)
 
p1 = threading.Thread(target=Producer, args=('P',))
c1 = threading.Thread(target=Consumer, args=('C',))
p1.start()
c1.start()

posted @ 2020-06-18 16:02  Adamanter  阅读(624)  评论(0)    收藏  举报