python kafka操作

 

来源于  https://blog.csdn.net/muyimo/article/details/84991405

 

 1 #!/usr/bin/env python
 2 import threading, logging, time
 3 import multiprocessing
 4  
 5 from kafka import KafkaConsumer, KafkaProducer
 6  
 7 BOOTSTRAP_SERVERS='127.0.0.1:9092'
 8  
 9  
10 class Producer(threading.Thread):
11     def __init__(self):
12         threading.Thread.__init__(self)
13         self.stop_event = threading.Event()
14         
15     def stop(self):
16         self.stop_event.set()
17  
18     def run(self):
19         producer = KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVERS)
20  
21         while not self.stop_event.is_set():
22             producer.send('my-topic', b"test")
23             producer.send('my-topic', b"\xc2Hola, mundo!")
24             time.sleep(1)
25  
26         producer.close()
27  
28 #读取数据
29 class Consumer(multiprocessing.Process):
30     def __init__(self):
31         multiprocessing.Process.__init__(self)
32         self.stop_event = multiprocessing.Event()
33         
34     def stop(self):
35         self.stop_event.set()
36         
37     def run(self):
38         consumer = KafkaConsumer(bootstrap_servers=BOOTSTRAP_SERVERS,
39                                  auto_offset_reset='earliest',
40                                  consumer_timeout_ms=1000)
41         #订阅某个topic
42         consumer.subscribe(['my-topic'])
43  
44         while not self.stop_event.is_set():
45             for message in consumer:
46                 print(message)
47                 if self.stop_event.is_set():
48                     break
49  
50         consumer.close()
51         
52         
53 def main():
54     tasks = [
55         #Producer(),
56         Consumer()
57     ]
58  
59     for t in tasks:
60         t.start()
61  
62     time.sleep(3600)
63     
64     for task in tasks:
65         task.stop()
66  
67     for task in tasks:
68         task.join()
69         
70         
71 if __name__ == "__main__":
72     logging.basicConfig(
73         format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
74         level=logging.INFO
75         )
76     main()

KafkaConsumer

#!/usr/bin/env python
#coding:gbk
 
#kafka的使用 consumer使用
 
import kafka import KafkaConsumer
#消费kafka中最新的数据 并且自动提交offsets[消息的偏移量]
consumer = KafkaConsumer('my-topic',
                          group_id='my-group',
                          bootstrap_servers=['localhost:9092'])
from message in consumer:
    #注意: message ,value都是原始的字节数据,需要decode
    #例如: message.value.decode('utf-8')
 
    print ("%s:%d:%d: key=%s value=%s" %s (message.topic, message.partition,
                                           message.offset, message.key,
                                           message.value))
 
#下面代码展示了kafkaConsumer常用的几个参数
#1:消费kafka中保存最早的数据,kafka默认保存几天的历史数据,不管这些数据是否消费,如果想读取最早打
数据就需要设置如下参数,第二个参数是不自动提交消费数据的offset
KafkaConsumer(auto_offset_reset='earliest', enable_auto_commit=False)
 
#2:消费json 格式的消息:
KafkaConsumer(value_deserializer=lambda m: json.loads(m.decode('ascii')))
 
#3:设置当kafka中没有可消费的数据超时时间
KafkaConsumer(consumer_timeout_ms=1000)#如果1秒内kafka中没有可供消费的数据,自动退出
 
#如果kafka一个group中同时设置了n个topic,想同时从几个topic中消费数据,代码如下:
#假设有三个topic,topic的名称分别是:topic1=awesome1 topic2=awesome2 topic3=awesome3
consumer = KafkaConsumer()
consumer.subscribe(pattern='^awesome.*')

 

posted @ 2020-04-16 11:50  苦行者的刀  阅读(459)  评论(0)    收藏  举报