【原创】oracle中大数据量join操作的试验

通过关联订购关系这个操作做了一个关于join操作的试验。

以前采用上下行表直接关联,2个表数据量大约是2200w左右和1400w左右,并且2个表都是属于宽表,字段内容多,占用空间大,但join的时候用到的字段很少(2个左右),因此很多内存都耗在了存储不必要的字段值。每次关联操作耗时在2个小时以上。

通过细化join相关表后,首先减少了单表的数据元数目,并且只在细化表中只存储了join操作必须的字段。因此对订购关系这张表来说,数据量减少了70%以上。

  由于sga得到了充分利用,因此join的效率也数10倍的得到了提升。

下面是采用细化分表后在join的详细实施过程:

减少join表的数据量后的详细操作记录:

#Elapsed: (ORDER)00:02:25.63   Elapsed: 00:00:48.45    13916602
create table test.tmp_user_info
as
select user_num,service_id,SUBCHANNEL_CODE SUB_CHANNEL_CODE,CHANNEL_CODE,REG_MODE,UNREG_MODE,ENABLE_STATUS,
  FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME from mqq.t_wx_user_info2 where (last_unreg_time like '2007-02-05 %' and enable_status='N') or enable_status='Y'


#Elapsed: 00:06:38.53 22674664
create table test.tmp_oicall_info_20070206
tablespace tbs_sub
as
select t1.MISC_MSG_ID,
  t1.MSG_ID,
  t1.MSG_TYPE,
  t1.FEE_TYPE,
  t1.FEE_VALUE,
  t1.SEND_ADDRESS,
  t1.RECV_ADDRESS,
  t1.FEE_ADDRESS,
  t1.DOWN_STATION,
  t1.OUTTER_ID,
  t1.SERVICE_ID,
  t1.OSS_RECV_CODE,
  t1.MT_TYPE,
  t1.MT_CONTENT,
  t1.MT_TIME,
  t1.CARRY_MSG,
  t1.CARRY_ID,
  t1.RPT_FLAG,
  t1.RPT_STATE,
  t1.GATEWAY_RPT_STATE,
  t1.link_id,
  t1.err_detail from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070206) t1
#Elapsed: 00:05:07.04


#Elapsed: 00:04:29.64 Elapsed: 00:04:34.68  (说明前面create table的时候,不需要order by)
create table test.tmp_sms_mt_info
as
select t1.user_num,t1.service_id,SUB_CHANNEL_CODE,t1.channel_code channel_code,REG_MODE,UNREG_MODE,ENABLE_STATUS,
  FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME
  from  test.tmp_user_info_2 t1,test.tmp_oicall_info t2 where t1.service_id=t2.service_id and t1.user_num=t2.recv_address
CREATE INDEX IDX_tmp_sms_mt_info ON test.tmp_sms_mt_info
(USER_NUM, service_id)
NOLOGGING
TABLESPACE TBS_SUB
PCTFREE    10
INITRANS   2
MAXTRANS   255
STORAGE    (
            INITIAL          64K
            MINEXTENTS       1
            MAXEXTENTS       2147483645
            PCTINCREASE      0
            BUFFER_POOL      DEFAULT
           )
NOPARALLEL;
///
select count(*) from mqqflow.t_sso_order_2007 partition(p_sso_order_200702) where stat_date=to_date('2007-02-07', 'YYYY-MM-DD')  2044 行
///
create table tmp_mt_sms
TABLESPACE TBS_STAT
PCTUSED    0
PCTFREE    10
INITRANS   1
MAXTRANS   255
STORAGE    (
            INITIAL          64K
            MINEXTENTS       1
            MAXEXTENTS       2147483645
            PCTINCREASE      0
            BUFFER_POOL      DEFAULT
           )
LOGGING
NOCOMPRESS
NOCACHE
NOPARALLEL
MONITORING
as
select
  t1.MISC_MSG_ID,
  t1.MSG_ID,
  t1.MSG_TYPE,
  t1.FEE_TYPE,
  t1.FEE_VALUE,
  t1.SEND_ADDRESS,
  t1.RECV_ADDRESS,
  t1.FEE_ADDRESS,
  t1.DOWN_STATION,
  t1.OUTTER_ID,
  t1.SERVICE_ID,
  t1.OSS_RECV_CODE,
  t1.MT_TYPE,
  t1.MT_CONTENT,
  t1.MT_TIME,
  t1.CARRY_MSG,
  t1.CARRY_ID,
  t2.channel_code,
  t2.SUB_CHANNEL_CODE,
  t2.user_num,
  t2.REG_MODE,
  t2.UNREG_MODE,
  t2.ENABLE_STATUS,
  t2.FIRST_REG_TIME,
  t2.LAST_REG_TIME,
  t2.LAST_UNREG_TIME,
  t1.RPT_FLAG,
  t1.RPT_STATE,
  t1.GATEWAY_RPT_STATE,
  t1.link_id,
  t1.err_detail
  from  mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070207)  t1
  left join
  test.tmp_sms_mt_info t2
  on t2.user_num = t1.recv_address
  and t1.service_id = t2.service_id

# Elapsed: 00:13:05.74
create table test. tmp_mt_sms
tablespace tbs_sub
as
  select
  t1.MISC_MSG_ID,
  t1.MSG_ID,
  t1.MSG_TYPE,
  t1.FEE_TYPE,
  t1.FEE_VALUE,
  t1.SEND_ADDRESS,
  t1.RECV_ADDRESS,
  t1.FEE_ADDRESS,
  t1.DOWN_STATION,
  t1.OUTTER_ID,
  t1.SERVICE_ID,
  t1.OSS_RECV_CODE,
  t1.MT_TYPE,
  t1.MT_CONTENT,
  t1.MT_TIME,
  t1.CARRY_MSG,
  t1.CARRY_ID,
  decode(t3.link_id,null,t2.channel_code,'WEB') channel_code,
  t2.SUB_CHANNEL_CODE,
  t2.REG_MODE,
  t2.UNREG_MODE,
  t2.ENABLE_STATUS,
  t2.FIRST_REG_TIME,
  t2.LAST_REG_TIME,
  t2.LAST_UNREG_TIME,
  t1.RPT_FLAG,
  t1.RPT_STATE,
  t1.GATEWAY_RPT_STATE,
  t1.link_id,
  t1.err_detail
  from test.tmp_oicall_info_20070206 t1
  left join test.tmp_sso_mt
  t3
  on t3.stat_date=to_date('2007-02-06', 'YYYY-MM-DD')
  and t3.link_id = t1.link_id
  left join
  test.tmp_user_info t2
  on t2.user_num = t1.RECV_ADDRESS
  and t1.service_id = t2.service_id

posted @ 2008-02-27 09:27  jacktu  阅读(2055)  评论(0编辑  收藏  举报