python kafka操作
来源于 https://blog.csdn.net/muyimo/article/details/84991405
1 #!/usr/bin/env python 2 import threading, logging, time 3 import multiprocessing 4 5 from kafka import KafkaConsumer, KafkaProducer 6 7 BOOTSTRAP_SERVERS='127.0.0.1:9092' 8 9 10 class Producer(threading.Thread): 11 def __init__(self): 12 threading.Thread.__init__(self) 13 self.stop_event = threading.Event() 14 15 def stop(self): 16 self.stop_event.set() 17 18 def run(self): 19 producer = KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVERS) 20 21 while not self.stop_event.is_set(): 22 producer.send('my-topic', b"test") 23 producer.send('my-topic', b"\xc2Hola, mundo!") 24 time.sleep(1) 25 26 producer.close() 27 28 #读取数据 29 class Consumer(multiprocessing.Process): 30 def __init__(self): 31 multiprocessing.Process.__init__(self) 32 self.stop_event = multiprocessing.Event() 33 34 def stop(self): 35 self.stop_event.set() 36 37 def run(self): 38 consumer = KafkaConsumer(bootstrap_servers=BOOTSTRAP_SERVERS, 39 auto_offset_reset='earliest', 40 consumer_timeout_ms=1000) 41 #订阅某个topic 42 consumer.subscribe(['my-topic']) 43 44 while not self.stop_event.is_set(): 45 for message in consumer: 46 print(message) 47 if self.stop_event.is_set(): 48 break 49 50 consumer.close() 51 52 53 def main(): 54 tasks = [ 55 #Producer(), 56 Consumer() 57 ] 58 59 for t in tasks: 60 t.start() 61 62 time.sleep(3600) 63 64 for task in tasks: 65 task.stop() 66 67 for task in tasks: 68 task.join() 69 70 71 if __name__ == "__main__": 72 logging.basicConfig( 73 format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', 74 level=logging.INFO 75 ) 76 main()
KafkaConsumer
#!/usr/bin/env python #coding:gbk #kafka的使用 consumer使用 import kafka import KafkaConsumer #消费kafka中最新的数据 并且自动提交offsets[消息的偏移量] consumer = KafkaConsumer('my-topic', group_id='my-group', bootstrap_servers=['localhost:9092']) from message in consumer: #注意: message ,value都是原始的字节数据,需要decode #例如: message.value.decode('utf-8') print ("%s:%d:%d: key=%s value=%s" %s (message.topic, message.partition, message.offset, message.key, message.value)) #下面代码展示了kafkaConsumer常用的几个参数 #1:消费kafka中保存最早的数据,kafka默认保存几天的历史数据,不管这些数据是否消费,如果想读取最早打 数据就需要设置如下参数,第二个参数是不自动提交消费数据的offset KafkaConsumer(auto_offset_reset='earliest', enable_auto_commit=False) #2:消费json 格式的消息: KafkaConsumer(value_deserializer=lambda m: json.loads(m.decode('ascii'))) #3:设置当kafka中没有可消费的数据超时时间 KafkaConsumer(consumer_timeout_ms=1000)#如果1秒内kafka中没有可供消费的数据,自动退出 #如果kafka一个group中同时设置了n个topic,想同时从几个topic中消费数据,代码如下: #假设有三个topic,topic的名称分别是:topic1=awesome1 topic2=awesome2 topic3=awesome3 consumer = KafkaConsumer() consumer.subscribe(pattern='^awesome.*')
用空常来坐坐
https://www.cnblogs.com/alexgl2008/