【代码片段】python调用shell
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import tempfile
import sys
import os
import subprocess
import logging
import re
__all__ = ["run", "query", "get_feature_name", "runfile"]
m_code, odpscmd = ('gbk', 'odpscmd.bat') if os.name == 'nt' else ('utf-8', "odpscmd")
ODPSCMD, ODPS_CONF = os.getenv('ODPSCMD'), os.getenv('ODPS_CONF')
if ODPSCMD:
odpscmd = ODPSCMD
def drop_table(table):
odps_sql = "drop table if exists %s;" % table
return run(odps_sql)
def run(sql, log_file=None):
logging.debug(sql)
def new_sql(sql): return ' '.join(v.split('--')[0] for v in sql.splitlines())
cmd = [odpscmd, "-e", new_sql(sql)]
return _run(cmd, log_file)
def runfile(filename, log_file=None):
logging.debug("run file: "+filename)
cmd = [odpscmd, "-f", filename]
return _run(cmd, log_file)
def _run(cmd, log_file):
if ODPS_CONF:
cmd.append('--config=%s' % ODPS_CONF)
sub_log = open(log_file, "a") if log_file is not None \
else tempfile.TemporaryFile() # fix bug for jupyter
std_out, std_err = (sub_log, sub_log) if sub_log else (sys.stdout, sys.stderr)
proc = subprocess.Popen(cmd, stdout=std_out, stderr=std_err)
code = proc.wait()
if sub_log:
sub_log.flush()
sub_log.close()
if 0 != code:
logging.warn("Inside error: %d, skiped job", code)
return False
return True
def query(sql, log_file=None):
logging.debug(sql)
pp = re.compile(r' *\| *')
sub_log = open(log_file, "a") if log_file is not None \
else tempfile.TemporaryFile()
cmd = [odpscmd, "-e", sql.replace('\r', '').replace('\n', ' ')]
if ODPS_CONF:
cmd.append('--config=%s' % ODPS_CONF)
std_err = sub_log if sub_log else sys.stderr
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=std_err)
res = []
for line in proc.stdout:
line = line.strip().decode(m_code) # python3
if len(line) < 1:
continue
if sql.startswith('show'):
res.append(line.split(':')[1])
continue
if sql.startswith('count'):
return int(line)
if line.startswith('+') or '|' not in line:
continue
items = pp.split(line)[1:-1]
res.append(items)
code = proc.wait()
if sub_log:
sub_log.flush()
sub_log.close()
if 0 != code:
logging.warn("Inside error: %d, skiped job", code)
return []
return res
def get_table_info(t_name, log_file=None):
vv = query("desc %s;" % t_name, log_file)
vfeature = []
idx = [0]*3
begin = False
for vi in vv:
if len(vi) >= 3 and 'Field' in vi and 'Type' in vi:
begin = True
idx[0] = vi.index('Field')
idx[1] = vi.index('Type')
idx[2] = vi.index('Comment')
continue
if begin is True:
if len(vi) < 3:
break
vfeature.append((vi[idx[0]], vi[idx[1]], vi[idx[2]]))
return vfeature
def get_feature_name(t_name, log_file=None):
vv = get_table_info(t_name, log_file)
vl = list(zip(*vv))
return vl[0]
if __name__ == "__main__":
logging.basicConfig(
level=logging.DEBUG,
format='[%(asctime)s] %(levelname)s (%(filename)s:%(lineno)d) %(message)s',
datefmt='%y-%m-%d %H:%M:%S',
filename='odps.log',
filemode='w')
tables = query("show tables 's_35926_*';", 'xx.log')
tables = ['s_35926_report02']
for t in tables:
print(t)
ss = query('read %s 10;' % t, 'xx.log')
for s in ss[1:]:
if len(s) <= 2:
continue
print(t.split('_')[2]+':'+s[0]+','+s[1]+','+s[2])
--- 她说, 她是仙,她不是神