pykafka生产者模型
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "Victor"
# Date: 2020/6/18
import sys
import json
from pykafka import KafkaClient
KAFKA_HOSTS = ["bjxg-bd-slave01:9092", "bjxg-bd-slave02:9092", "bjxg-bd-slave03:9092", "bjxg-bd-slave04:9092", "bjxg-bd-slave05:9092"]
if __name__ == '__main__':
client = KafkaClient(hosts=KAFKA_HOSTS)
topics = client.topics
topic = topics['test_topic']
print(topic)
written_msgs = 0
with topic.get_producer() as producer:
for line in sys.stdin:
# 输入的时候用tab隔离开
line = line.strip('\n')
items = line.split('\t')
user_id = items[0]
device_id = items[1]
producer.produce(json.dumps({'user_id': user_id, 'device_id':device_id}))
written_msgs += 1
if written_msgs % 1000 == 0:
print("written_msgs: %d" % written_msgs)
print("written_msgs: %d" % written_msgs)
消费者模型
import json
from pykafka import KafkaClient
KAFKA_HOSTS = ["bjxg-bd-slave01:9092", "bjxg-bd-slave02:9092", "bjxg-bd-slave03:9092", "bjxg-bd-slave04:9092", "bjxg-bd-slave05:9092"]
TOPIC = "ad-behavior.xxxxxxx-v2"
if __name__ == '__main__':
"""pykafka的消费程序, 注意输入集群的全部机器地址, 不然会产生其他问题
"""
client = KafkaClient(hosts=KAFKA_HOSTS)
all_topics = client.topics
print("all the topics: ", all_topics)
topic = client.topics[TOPIC]
# 获取消费者, 注意组名不要轻易变化, 不然容易引起kafka的负载均衡的问题
# auto_commit_enable配置项的作用是当配置为true时,每次获取到消息后就会自动更新存储在zookepper中的offset值==> 简单说就是避免重复消费
consumer = topic.get_balanced_consumer('MY_GROUP1', auto_commit_enable=True, auto_commit_interval_ms=3000)
for message in consumer:
if message is not None:
print(message.offset, message.value)
# 这个写法好像被废弃了
consumer = topic.get_simple_consumer(consumer_group="mygroup",auto_commit_enable=True)
for message in consumer:
if message is not None:
print(message.offset, message.value)
不用任何模块简单模拟生产者和消费者
import time, random
import queue,threading
q = queue.Queue(5)
def Producer(name):
count = 0
while count <10:
q.put(count)
print(f'Producer {name} has produced {count}...')
count +=1
time.sleep(1)
def Consumer(name):
count = 0
while count <10:
if not q.empty():
data = q.get()
print(f'Consumer {name} has consumed {data} ...')
else:
print("-----no producer anymore----")
count +=1
time.sleep(1)
p1 = threading.Thread(target=Producer, args=('P',))
c1 = threading.Thread(target=Consumer, args=('C',))
p1.start()
c1.start()