tianchi微博预测-目前进度

从9.29第二赛季开始评测,开放御膳房平台之后。已经过了去了两周。
十一在台风里转了一圈不算。
每个早上醒来都别具风味呐。

现在的成绩是59.2%,提交日期是昨晚
方法是按照评分规则,如果用户出现过最高档,则全部按照最高档预测。

上一周:入门SQL,基本能用select,join,case when等等;但是group是心头病呐。
这一周goal:学会用算法平台,尝试用随机森林。写一些特征看看能不能达到70%

代码如下

  ------------------------ 
     ---like+repo+comment = interact
     ------------------------
    drop table if exists b;
    create table b as
    select
        uid,
        count(action_type) as interact
    from tianchi_weibo.weibo_action_data_train
    group by uid;

    -----------------
    ---level point
    -----------------
    drop table if exists blevel;
    create table blevel as 
     select 
        uid,
        case 
            when b.interact>100 then b.interact*200
            when b.interact<=100 and b.interact>50 then b.interact*100
            when b.interact<=50 and b.interact>11 then b.interact*50
            when b.interact<=11 and b.interact>6 then b.interact*10
            when b.interact<=5 and b.interact>=0 then b.interact
            end as level
        from b;
    

    --------------------
    ----max level-------------
    ---------------------
    drop table if exists blevelmax;
    create table blevelmax as 
    select 
            uid,
            max(level) as max_level
        from blevel
    group by uid;
        
        
    -- drop table if exists blevelnum;
    -- create table blevelnum as 
    select 
            uid,
            case 
                when max_level>=20000 then max_level/200
                when max_level>=5000 and max_level<20000 then max_level/100
                when max_level>=550 and max_level<5000 then max_level/50
                when max_level>=60 and max_level<550 then max_level/10
                when max_level>=0 and max_level<60 then max_level
                end as levelpoint
        from blevelmax;
       
    
    -------------------
    --dummy all 0 -------
    -------------------
    drop table if exists x;
    create table x as
    select
        uid,
        mid,
        0 as levelpoint
    from tianchi_weibo.weibo_blog_data_test;


    ---------------------
    ---join dummy and predict-maxlevel ; clear NULL
    ---------------------
    drop table if exists weibo_rd_2_submit;
    create table weibo_rd_2_submit as 
        ------clear 0 from havenull
    select 
        havenull.uid,
        havenull.mid,
        case 
            when havenull.action_sum IS NULL then 0 
            when havenull.action_sum IS NOT NULL then cast(havenull.action_sum as bigint)
        end as action_sum
    from
            ------dummy0 inner join bsum 
        (
            select
                    x.uid,
                    x.mid,
                    y.action_sum as action_sum
            from
                    x
                    left outer join
                    (
                        select
                            blevelnum.uid,
                            blevelnum.levelpoint as action_sum
                        from blevelnum
                    )y
            on x.uid=y.uid
        )havenull;


    -----------
    ----show---
    ------------

    select * from weibo_rd_2_submit
    order by action_sum limit 2000;

posted @ 2015-10-12 18:22  dunfentiao  阅读(263)  评论(0)    收藏  举报