Using python contral bash commmands and run them in parallel

Goal:

  1. Read commands from pipe line by line, each command occpuying one line.

  2. Run these commands in parallel, and output their outputs(stdout and stderr) simutaliously. 

Technical nutshell:

  1. Run bash commands by subprocess

  2. Using threads moniterring outputs.

CODE:

  1 #!/usr/bin/env python
  2 
  3 '''
  4     Read commands from stdin and process it line by line.
  5     
  6     @Author: 
  7     @Date: 2013 - 07 - 27
  8     @Version: 1.0
  9     
 10     @Ref.:
 11     http://code.activestate.com/recipes/577376-simple-way-to-execute-multiple-process-in-parallel/
 12     http://sharats.me/the-ever-useful-and-neat-subprocess-module.html
 13 '''
 14 
 15 import sys,time
 16 from subprocess import Popen,PIPE,STDOUT
 17 from threading import Thread
 18 from Queue import Empty,Queue
 19 
 20 def help():
 21     sys.stderr.write('''
 22     Read commands from stdin, and run them in parallel line by line
 23     
 24     @Author: 
 25     @Version: 1.0
 26     
 27     @Usages:
 28     para1: How many cpus do you want to use.(eg. 10)
 29     
 30     @Note:
 31     1. Read from stdin and output messages to stdout.
 32     2. This program can only run on Linux system, invoke '/bin/bash' to interpretate,
 33         this could make the substitution feature works. 
 34     \n''')
 35     sys.stderr.flush()
 36     sys.stderr.close()
 37     sys.exit(-1)
 38 
 39 class P(object):
 40     cpu_num = 1 #the number of cpus want to use.
 41 
 42 def executeCommands(cmdList):
 43     '''Execute commands from cmd list in parallel.'''
 44     totalTask = len(cmdList)
 45     doneTask = 0
 46 
 47     def checkDone(p):
 48         '''check whether process is done!'''
 49         return p.poll() is not None
 50     def checkSuccess(p):
 51         '''check whether process exited successful'''
 52         return p.returncode == 0
 53     def closeWriter():
 54         '''flush all writer out'''
 55         sys.stdout.flush()
 56         sys.stdout.close()
 57         sys.stderr.flush()
 58         sys.stderr.close()    
 59     
 60     stdout_Queue = Queue()
 61     stderr_Queue = Queue()
 62     #process starts from here!
 63     processing = []
 64     
 65     
 66     def streamWatcher(queue, stream):
 67         #print('streamWatcher-stdout\n')
 68         for line in stream: #this will be blocked if no data in buffer.
 69             queue.put(line)
 70         if not stream.closed:
 71             stream.close()
 72         
 73         #queue.task_done()
 74     
 75     #def streamPrinter(stdout_Queue,stderr_Queue):
 76     def streamPrinter(queue,stream):
 77         '''printing messages in queue'''
 78         while(1):
 79             try:
 80                 line = queue.get(True,0.1)
 81             except Empty:
 82                 pass
 83             else:
 84                 queue.task_done()
 85                 stream.write(line)
 86         
 87     #Using a thread to do printing.
 88     threadPool = []
 89     
 90     t = Thread(target=streamPrinter,name='stdout_printer',args=(stdout_Queue,sys.stdout))
 91     t.daemon = True
 92     t.start()
 93     
 94     t = Thread(target=streamPrinter,name='stderr_printer',args=(stderr_Queue,sys.stderr))
 95     t.daemon = True
 96     t.start()
 97     #threadPool.append(t)
 98     
 99     while(1):
100         while cmdList and len(processing) < P.cpu_num:
101             #rediect stderr to STDOUT
102             #print(cmdList)
103             cmd=cmdList.pop(0)
104             p = Popen(cmd,stdout=PIPE,shell=True,stderr=PIPE,bufsize=1
105                       ,close_fds=True
106                       ,executable='/bin/bash'
107                       )
108             p.cmd = cmd
109             processing.append(p)
110             #Using threading to watch output stream
111             t =Thread(target=streamWatcher, name='stdoutWatcher',args=(stdout_Queue,p.stdout))
112             t.daemon = True
113             t.start()
114 
115             
116             t = Thread(target=streamWatcher, name='stderrWatcher',args=(stderr_Queue,p.stderr))
117             t.daemon = True
118             t.start()
119 
120             
121         #check output and run states.
122         for p in processing:
123             #checkOutput(p)
124             if checkDone(p):
125                 if checkSuccess(p):
126                     doneTask += 1
127                     processing.remove(p)
128                 else:
129                     sys.stderr.write('***Caution: there are commands exited unexpectedly , System exited!***\n')
130                     sys.stderr.write('Error Invoking Command:\n%s\n'%(p.cmd))
131                     
132                     #kill all the running process
133                     for x in processing:
134                         try:
135                             x.kill()
136                         except OSError:
137                             pass
138                         
139                     closeWriter()
140                     sys.exit(-1)
141         
142         if not processing and not cmdList:
143             break
144         else:
145             time.sleep(0.1)
146             
147     # waiting queue done
148     stdout_Queue.join()
149     stderr_Queue.join()
150     
151 
152 if __name__ == '__main__':
153     if(len(sys.argv) != 2):
154         help()
155     
156     P.cpu_num = int(sys.argv[1])
157     
158     cmds = []
159     for line in sys.stdin:
160         line = line.strip()
161         if len(line) > 0:
162             cmds.append(line)
163     
164     executeCommands(cmds)
165     
166     

 

 Points:

  1.  t.daemon = True # set thread dies with the program. 

  2. stdout_Queue.join() # wait on the Queue until everything has been processed.

  3. stdout_Queue.task_done() # singals to Queue job is done.

  4. Queue.get([block[timeout]]) Remove and return an item from the queue. If optional args block is true and timeout is None (the default), block if necessary until an item is available. If timeout is a positive number, it blocks at most timeout seconds and raises the Empty exception if no item was available within that time. Otherwise (block is false), return an item if one is immediately available, else raise the Empty exception (timeout is ignored in that case).

  5. Queue.put(item[block[timeout]]) Put item into the queue. If optional args block is true and timeout is None (the default), block if necessary until a free slot is available. If timeout is a positive number, it blocks at mosttimeout seconds and raises the Full exception if no free slot was available within that time. Otherwise (block is false), put an item on the queue if a free slot is immediately available, else raise the Full exception (timeout is ignored in that case).

 

posted @ 2013-07-28 16:30  wavefancy  阅读(364)  评论(0)    收藏  举报