sqlldr、sqluldr2_w64案例

参考链接:http://blog.itpub.net/10951282/viewspace-757712/

 sqluldr2大数据经常有bad数据(容错性太差),还是有点不太好用

查看帮助D:\oracle\sqluldr2> sqluldr2.exe

Valid Keywords:
user = username/password@tnsname
sql = SQL file name
query = select statement
field = separator string between fields
record = separator string between records
rows = print progress for every given rows (default, 1000000)
file = output file name(default: uldrdata.txt)
log = log file name, prefix with + to append mode
fast = auto tuning the session level parameters(YES)
text = output type (MYSQL, CSV, MYSQLINS, ORACLEINS, FORM, SEARCH)
charset = character set name of the target database.
ncharset= national character set name of the target database.
parfile = read command option from parameter file

for field and record, you can use '0x' to specify hex character code,
\r=0x0d \n=0x0a |=0x7c ,=0x2c, \t=0x09, :=0x3a, #=0x23, "=0x22 '=0x27

导出

D:\oracle\sqluldr2> sqluldr2_w64 scott/s123 query="select * from qmcb_ls_100" table=qmcb_ls_100 head=yes file=d:\oracle\sqluldr2\qmcb_ls_100.csv

D:\oracle\sqluldr2> sqluldr2_w64 scott/s123@xxx:1521/xxx sql=d:\oracle\sqluldr2\qmcb_ls.sql table=qmcb_ls head=yes file=d:\oracle\sqluldr2\qmcb_ls.csv

并行

--导出数据及控制文件,parallel那个数字亲测没啥用

D:\oracle\sqluldr2> sqluldr2_w64.exe USER=test/test@orcl QUERY="select /*+ parallel(2) */  * from AA10 where rownum<10" table=AA10 head=yes FILE=d:\oracle\sqluldr2\AA10.csv

sqlldr test/test@orcl control=D:\oracle\sqluldr2\AA10.ctl

当sqluldr2有table选项会默认生成ctl文件,以用于导入

 

导入

C:\Users\epsoft>sqlldr userid=scott/s123 control=d:\oracle\sqluldr2\qmcb_ls_100.ctl data=d:\oracle\sqluldr2\qmcb_ls_100.csv direct=true parallel=true

sqlldr jms/jms@tiod control=ent_person_sqlldr.ctl log=ent_person_sqlldr.log bad=ent_person_sqlldr_bad.log skip=1  errors=5000 rows=5000 bindsize=335542

 

当加载大量数据时(大约超过10GB),最好抑制日志的产生, 这样不产生REDO LOG,可以提高效率。

SQL>ALTER TABLE RESULTXT nologging;

然后在 CONTROL 文件中 load data 上面加一行:unrecoverable此选项必须要与 DIRECT 共同应用

parallel并不是让一个sqlldr语句起多个进程来加载数据,而是不锁住加载表,允许别的直接路径加载. 所以要使parallel起作用,应该先将要加载的数据文件分成多个,用多个sqlldr语句同时加载,如下例:

sqlldr userid=/ control=result1.ctl direct=true parallel=true 
sqlldr userid=/ control=result2.ctl direct=true parallel=true

 

sqlldr导入之前数据库需先建表


连接方式
sqlldr user/pwd control=webaccess.ctl
sqlldr test/test@orcl control=D:\oracle\sqluldr2\users.ctl
sqlldr mh/mh@11.11.11.11:1521/ora10 control=fund_inf.ctl

案例

--ctl中默认记录间的分割符是回车换行符,当记录中含回车换行符时,可运用str属性指定记录分隔符:infile test.dat "str '|\r\n"

--设置字符集:UTF8、AL32UTF8、ZHS16GBK
--select * from v$nls_parameters;
--select name, value$ from sys.props$ where name like 'NLS%';
OPTIONS (skip=1,rows=128)
LOAD DATA  
CHARACTERSET ZHS16GBK  --即本行代码为设置字符集
INFILE "D:\oracle\sqluldr2\users_data.csv"
INFILE "D:\oracle\sqluldr2\users_data1.csv"
INTO TABLE users_t TRUNCATE
FIELDS TERMINATED BY ","
--OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
  virtual_column FILLER,
  user_id,
  user_name,
  login_times,
  last_login DATE "YYYY-MM-DD HH24:MI:SS"
)



--第二行还是导不进去,不知道为啥..连续分隔符?
--数据类型:DECIMAL EXTERNAL,INTEGER EXTERNAL,CHAR
--number会报错, 不加INTEGER EXTERNAL也可以导入成功
OPTIONS (skip=1,rows=128)
LOAD DATA  
CHARACTERSET UTF8  	--即本行代码为设置字符集
INFILE *
INTO TABLE users_t TRUNCATE
FIELDS TERMINATED BY x'09'
OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
  virtual_column FILLER,
  user_id INTEGER EXTERNAL,
  user_name "case when length(:user_name)=0 then null else :user_name end",  --必须用双引号括起
  --user_name NULLIF user_name = BLANKS,
  login_times INTEGER EXTERNAL,
  last_login DATE "YYYY-MM-DD HH24:MI:SS"
)
BEGINDATA
	USER_ID	USER_NAME	LOGIN_TIMES	LAST_LOGIN
1	1	Unmi	3	2020/11/16 17:00
2	2		5	2008/10/15
3	3	"隔叶	黄莺"	8	2009/1/2
4	4	Kypfos		
5	5	不知秋 	1	2008/12/23



--infile后根文件名默认扩展名为'.dat'
--不指定列类型时,默认为char
--操作类型:replace, insert(默认), truncate, append
LOAD DATA 
INFILE *
INTO TABLE dept1 REPLACE
FIELDS TERMINATED BY x'09'
(
	deptno,
	dname,
	loc
)
BEGINDATA
10	sales	Virginia
20	accounting	Virginia



--处理定长数据
LOAD DATA 
INFILE 'account.dat'    --数据同控制文件在一起时,用infile *
INTO TABLE count_trans append
WHERE year='1990'
(
	account_nbr 	position(01:10) character,
	day 			position(11:12) character,
	month 			position(13:14) character,
	transation_code position(15:16) character,
	credit_amount 	position(17:30) character
)
INTO TABLE count REPLACE
WHERE year>'1990'
(
	account_nbr 	position(01:10) character,
	day 			position(11:12) character,
	month 			position(13:14) character,
	transation_code position(15:16) character,
	credit_amount 	position(17:30) character
)



--处理变长数据
--可为列单独指定分隔符
--可以写多个 INFILE "another_data_file.csv" 指定多个外部数据文件
--还可以使用 BADFILE、DISCARDFILE 来指定坏数据和丢弃数据的文件,
--infile 'accounts' discardfile mtidsc.rec badfile mthad.rec
--control选项:skip=跳过行数,rows=多少条提交一次(默认64),errors=允许的错误记录数
--log=xx.log,bad=xx.bad,data=xxx(一般通过infile指定)
--log 记录日志文件,默认控制文件名去掉ctl,加log
--bad 坏数据文件,默认控制文件名去掉ctl,加badOPTIONS (skip=1,rows=128)
LOAD DATA 
INFILE 'customer.dat'
INTO TABLE aa append 
WHERE year='1990'
(
	customer_id 	char terminated by '',
	status 			char terminated by '',
	dsc_class 		char enclosed by '',
	source 			char terminated by whitespace
)




--可对列进行操作
LOAD DATA 
INFILE *
INTO TABLE dept1 REPLACE
FIELDS TERMINATED BY ','
(
	deptno,
	dname	"upper(:dname)",
	loc		"upper(:loc)",
	last_updated	date 'dd/mm/yyyy'
)
BEGINDATA
10,sales,Virginia,1/5/20000
20,accounting,Virginia,21/6/1999



--trailing nullcols 字段没有对应的值时允许为空
--可字段拼接
LOAD DATA
INFILE *
INTO TABLE DEPT REPLACE
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
	deptno,
	dname	"upper(:dname)",
	loc		"upper(:loc)",
	last_updated	date 'dd/mm/yyyy',
	enter_line ":deptno||:dname||:loc||:last_updated"   --字段拼接
)
BEGINDATA
10,Sales,Virginia,1/5/2000
20,Accounting,Virginia,21/6/1999
30,Consulting,Virginia,5/1/2000
40,Finance,Virginia,15/3/2001


--可条件赋值
LOAD DATA
INFILE *
INTO TABLE DEPT REPLACE
FIELDS TERMINATED BY ','
TRAILING NULLCOLS
(
	deptno,
	dname	"upper(:dname)",
	loc		"upper(:loc)",
	last_updated	"case when length(:last_updated)<=10 then to_date(:last_updated, 'dd/mm/yyyy')
						else to_date(:last_updated, 'dd/mm/yyyy hh24:mi:ss') end"
)
BEGINDATA
10,Sales,Virginia,1/5/2000 12:03:03
20,Accounting,Virginia,21/6/1999
30,Consulting,Virginia,5/1/2000 01:23:00
40,Finance,Virginia,15/3/2001


--对于如下数据我们需自定义日期处理函数
BEGINDATA
10,Sales,Virginia,01-april-2001
20,Accounting,Virginia,13/04/2001
30,Consulting,Virginia,14/04/2001 12:02:02
40,Finance,Virginia,987268297
50,Finance,Virginia,02-apr-2001
60,Finance,Virginia,Not a date

--------
create or replace function my_to_date(p_string in varchar2) return date
as
	type fmtArray is table or varchar2(25);
	l_fmts fmtArray := fmtArray('dd-mon-yyyy','dd-month-yyyy',
								'dd/mm/yyyy','dd/mm/yyyy hh24:mi:ss');
	l_return date;
	
	begin
		for i in 1 .. l_fmts.count loop
			begin 
				l_return := to_date(p_string, l_fmts(i))
				exception when others then null;
			end;
			EXIT when l_return is not null;
		end loop;
		
		if(l_return is null) then
			l_return := new_time(to_date('01011970','ddmmyyyy') + 1/24/60/60*p_string, 'GMT', 'EST')
		end if;
		return l_return;
	end;



--可以指定filler列将其不导入数据库
--Optionally enclosed by '"' 表示数据中含“”括起来的字段,比如该字段中含分隔符等
--oracle处理部分必须用双引号括起来,orcle语法单引号表示字符串,防止冲突
LOAD DATA 
INFILE * 
INTO TABLE dept1 replace
FIELDS TERMINATED BY ',' 
OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
	deptno,
	dname	"upper(:dname)",
	loc		"upper(:loc)",
	last_updated	"case when length(:last_updated)<=10 then to_date(:last_updated, 'dd/mm/yyyy')
						else to_date(:last_updated, 'dd/mm/yyyy hh24:mi:ss') end"
)
BEGINDATA
20,Something Not To BE Loaded,accounting,"Virginia,USA"



--sqlldr默认char类型输入字节流最大长度为255,超过需指定 char(N),对于较短数据也最好指定长度
OPTIONS (skip=1,rows=128)
LOAD DATA 
INFILE *
INTO TABLE users append 
WHEN LOGIN_TIMES<>'8'
FIELDS TERMINATED BY "," 
TRAILING NULLCOLS
( 
	virtual_column FILLER,         
	--prod_id  char(32)  "trim(:prod_id)",
	--acc_num  char(20)  "replace(:acc_num,chr(13),'')"
	user_id "user_seq.nextval",     --这一列直接取序列的下一值,而不用数据中提供的值 
	user_name "'Hi '||upper(:user_name)",    -- 还能用SQL函数或运算对数据进行加工处理 
	login_times terminated by "," NULLIF(login_times='NULL'),  --可为列单独指定分隔符 
	last_login DATE "YYYY-MM-DD HH24:MI:SS" NULLIF(last_login="NULL")   -- 当字段为"NULL"时就是 NULL 
) 
BEGINDATA
  ,USER_ID,USER_NAME,LOGIN_TIMES,LAST_LOGIN 
1,1,Unmi,3,2009-1-5 20:34 
2,2,Fantasia,5,2008-10-15 
3,3,隔叶黄莺,8,2009-1-2 
4,4,Kypfos,NULL,NULL 
5,5,不知秋,1,2008-12-23



连接方式:
sqlldr user/pwd control=webaccess.ctl
sqlldr test/test@orcl control=D:\oracle\sqluldr2\users.ctl
sqlldr mh/mh@22.11.97.96:1521/ora10 control=fund_inf.ctl



--查看TERMINATED BY=x'09'和WHITESPACE的结果,WHITESPACE加载正常
LOAD DATA
INFILE *
INTO TABLE DEPT2
INSERT
FIELDS TERMINATED BY WHITESPACE
(DEPTNO, DNAME, LOC char(1000))
BEGINDATA
10              Sales          Virginia
20              Accounting     Virginia
30              Consulting     Virginia
40              Finance        Virginia    


--也可以
LOAD DATA
INFILE *
INTO TABLE DEPT2
INSERT
FIELDS TERMINATED BY WHITESPACE
(DEPTNO, ad filler, DNAME, a2 filler, LOC char(1000))
BEGINDATA
10 Sales Virginia
20 Accounting Virginia
30 Consulting Virginia
40 Finance Virginia




--可根据条件插入不同的表/分区,如一次装载所有分区,可通过装载表方式处理
into table account_trans
--into table sale partition(east_data)
when day between '01' and '31'
into table account_nbr
when account_type between 'aa' and 'zz'



--过滤开头#号的行
INTO TABLE <TABLE_NAME>
WHEN (1) <> '#'
--CONTINUEIF NEXT(1:1) = '#'

  

  

posted on 2020-11-19 15:28  iUpoint  阅读(409)  评论(0编辑  收藏  举报

导航