Maxcompute-UDTF读取MaxCompute资源

1、新建资源Python，输入资源名称py_udtf_example.py，并输入或者粘贴代码，然后保存并提交。

# -*- coding: utf-8 -*-

from odps.udf import annotate

from odps.udf import BaseUDTF

from odps.distcache import get_cache_file

from odps.distcache import get_cache_table

@annotate('string -> string, bigint')

class UDTFExample(BaseUDTF):

    """读取资源文件和资源表里的pageid、adid，生成dict

    """

    def __init__(self):

        import json

        cache_file = get_cache_file('test_json.txt')

        self.my_dict = json.load(cache_file)

        cache_file.close()

        records = list(get_cache_table('table_resource1'))

        for record in records:

            self.my_dict[record[0]] = [record[1]]

    """输入pageid，输出pageid以及它对应的所有adid

    """

    def process(self, pageid):

        for adid in self.my_dict[pageid]:

            self.forward(pageid, adid)

2、新建资源File，输入资源名称：test_json.txt，并输入或者粘贴内容，然后保存并提交。

3、通过odps sql节点创建表。

--创建资源表table_resource1，并插入数据。
create table if not exists table_resource1 (pageid string, adid int);
insert into table table_resource1 values("contact_page2",2),("contact_page3",5);

--创建内部表tmp1，并插入数据。
create table if not exists tmp1 (pageid string);
insert into table tmp1 values ("front_page"),("contact_page1"),("contact_page3");

--添加资源表table_resource1为Maxcompute的资源。
add table table_resource1 as table_resource1;