【代码片段】python调用shell

#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import tempfile
import sys
import os
import subprocess
import logging
import re

__all__ = ["run", "query", "get_feature_name",  "runfile"]

m_code, odpscmd = ('gbk', 'odpscmd.bat') if os.name == 'nt' else ('utf-8', "odpscmd")
ODPSCMD, ODPS_CONF = os.getenv('ODPSCMD'), os.getenv('ODPS_CONF')
if ODPSCMD:
    odpscmd = ODPSCMD


def drop_table(table):
    odps_sql = "drop table if exists %s;" % table
    return run(odps_sql)


def run(sql, log_file=None):
    logging.debug(sql)
    def new_sql(sql): return ' '.join(v.split('--')[0] for v in sql.splitlines())
    cmd = [odpscmd, "-e", new_sql(sql)]
    return _run(cmd, log_file)


def runfile(filename, log_file=None):
    logging.debug("run file: "+filename)
    cmd = [odpscmd, "-f", filename]
    return _run(cmd, log_file)


def _run(cmd, log_file):
    if ODPS_CONF:
        cmd.append('--config=%s' % ODPS_CONF)
    sub_log = open(log_file, "a") if log_file is not None \
        else tempfile.TemporaryFile()  # fix bug for jupyter
    std_out, std_err = (sub_log, sub_log) if sub_log else (sys.stdout, sys.stderr)
    proc = subprocess.Popen(cmd, stdout=std_out, stderr=std_err)
    code = proc.wait()
    if sub_log:
        sub_log.flush()
        sub_log.close()

    if 0 != code:
        logging.warn("Inside error: %d, skiped job", code)
        return False
    return True


def query(sql, log_file=None):
    logging.debug(sql)
    pp = re.compile(r' *\| *')
    sub_log = open(log_file, "a") if log_file is not None \
        else tempfile.TemporaryFile()
    cmd = [odpscmd, "-e", sql.replace('\r', '').replace('\n', ' ')]
    if ODPS_CONF:
        cmd.append('--config=%s' % ODPS_CONF)
    std_err = sub_log if sub_log else sys.stderr
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=std_err)

    res = []
    for line in proc.stdout:
        line = line.strip().decode(m_code)   # python3
        if len(line) < 1:
            continue
        if sql.startswith('show'):
            res.append(line.split(':')[1])
            continue
        if sql.startswith('count'):
            return int(line)
        if line.startswith('+') or '|' not in line:
            continue
        items = pp.split(line)[1:-1]
        res.append(items)

    code = proc.wait()
    if sub_log:
        sub_log.flush()
        sub_log.close()

    if 0 != code:
        logging.warn("Inside error: %d, skiped job", code)
        return []
    return res


def get_table_info(t_name, log_file=None):
    vv = query("desc %s;" % t_name, log_file)
    vfeature = []
    idx = [0]*3
    begin = False
    for vi in vv:
        if len(vi) >= 3 and 'Field' in vi and 'Type' in vi:
            begin = True
            idx[0] = vi.index('Field')
            idx[1] = vi.index('Type')
            idx[2] = vi.index('Comment')
            continue
        if begin is True:
            if len(vi) < 3:
                break
            vfeature.append((vi[idx[0]], vi[idx[1]], vi[idx[2]]))
    return vfeature


def get_feature_name(t_name, log_file=None):
    vv = get_table_info(t_name, log_file)
    vl = list(zip(*vv))
    return vl[0]


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.DEBUG,
        format='[%(asctime)s] %(levelname)s (%(filename)s:%(lineno)d) %(message)s',
        datefmt='%y-%m-%d %H:%M:%S',
        filename='odps.log',
        filemode='w')

    tables = query("show tables 's_35926_*';", 'xx.log')
    tables = ['s_35926_report02']
    for t in tables:
        print(t)
        ss = query('read %s 10;' % t, 'xx.log')
        for s in ss[1:]:
            if len(s) <= 2:
                continue
            print(t.split('_')[2]+':'+s[0]+','+s[1]+','+s[2])
posted @ 2024-03-12 10:37  bregman  阅读(3)  评论(0编辑  收藏  举报