建表语句:

CREATE  TABLE web_base(
  uri string, 
  ref string, 
  ip string, 
  uid string, 
  sid string, 
  loc string, 
  snapid string, 
  time bigint, 
  editor string, 
  ch1 string, 
  ch2 string, 
  ch3 string, 
  ch4 string, 
  uridomain string, 
  refdomain string, 
  reftype string, 
  ci string, 
  ci2 string, 
  plat string)
PARTITIONED BY ( 
  dt string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY '\t' 
  LINES TERMINATED BY '\n' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.RCFileInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
LOCATION
  'hdfs://nameservice1/user/hive/warehouse/web_base'

 

错误:

from
(
from
(
from
(
select uri, from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd+HH:mm:ss') as ct from web_base where dt = '2015-03-07' and from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd HH:mm:ss') is not null and uri like '%12369.ifeng.com%') a select uri, ct where instr(ct, '2015-03-07') <> 0) b select uri, split(split(ct, '+')[1], ':')[0] as ct_hour) c select count(*), ct_hour group by ct_hour;

split(string str, string patten)

第二个参数支持正则,所以“+”号需要转意;

正确:

from
(
from
(
from
(
select uri, from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd+HH:mm:ss') as ct from web_base where dt = '2015-03-07' and from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd HH:mm:ss') is not null and uri like '%12369.ifeng.com%') a select uri, ct where instr(ct, '2015-03-07') <> 0) b select uri, split(split(ct, '\\+')[1], ':')[0] as ct_hour) c select count(*), ct_hour group by ct_hour;

 计算一段时间趋势,group by day, hour即可:

from
(
from
(
from
(
select uri, from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd+HH:mm:ss') as ct from web_base where dt between '2015-03-07' and '2015-03-10' and from_unixtime(cast(substr(time, 1, 10) as bigint), 'yyyy-MM-dd HH:mm:ss') is not null and uri like '%12369.ifeng.com%') a select uri, ct where instr(ct, '2015-03-07') <> 0) b select uri, split(ct, '\\+')[0] as ct_day, split(split(ct, '\\+')[1], ':')[0] as ct_hour) c select count(*), ct_day, ct_hour group by ct_day, ct_hour;

 

posted on 2015-03-23 18:04  闪电战  阅读(331)  评论(0)    收藏  举报