//逐行读取日志记录
package com.expai.test;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.expai.admin.util.logIn.InsertToDB;
import com.expai.admin.util.logIn.ParseString;
public class ReadData {
private InsertToDB it=new InsertToDB();
private ParseString ps = new ParseString();
private static Map<String,Map<String,Object>> hashmap=new HashMap<String,Map<String,Object>>();
private static String regex="[0-9a-zA-Z]+((\\.com)|(\\.cn)|(\\.org)|(\\.net)|(\\.edu)|(\\.com.cn))";
public static void main(String[] args) throws IOException{
try {
new ReadData().readFile("C:/Users/Administrator/Desktop/adverview.log.20140630","urlno",null,null);
} catch (Exception e) {
e.printStackTrace();
}
}
public void readFile(String filePath,String query,String[] parm,Map<String,String> map) throws IOException {
List<Map<String,Object>> listMap = new ArrayList<Map<String,Object>>();
String str = "ip"+","+"address"+","+"network"+","+"imageUrl"+","+"urlno"+","+"visitTime"+","+"shortUrl";
String[] data={"imageUrl","urlno","keyword","shortUrl","ip","visitTime","domainId"};
int count = 0;
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(filePath),"gbk"));
long startTime=System.currentTimeMillis();//统计计时开始
Integer n = 0;
String strLine = br.readLine();
while(null!=strLine){
Map<String,Object> mapNew = ps.everyLogRead(strLine,data);
if(mapNew.get("domainId")!=null){
str = str+",domainId";
}
if(null!=map && map.size()>0){
Set<String> set=map.keySet();
for (Iterator<String> iterator = set.iterator(); iterator.hasNext();) {
String key = (String) iterator.next();
String value=map.get(key);
mapNew.put(value, mapNew.get(key));
}
}
hashmap.put( (String) mapNew.get(query),mapNew);
if(null!=hashmap.get((String) mapNew.get(query))){
mapNew.put("flagId", hashmap.get((String) mapNew.get(query)).get("flagId"));
}
String imageUrl = (String)mapNew.get("imageUrl");
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(imageUrl);
List<String> strList = new ArrayList<String>();
while(m.find()){
strList.add(m.group());
}
String shortUrl = strList.toString();
shortUrl = shortUrl.substring(1,shortUrl.length()-1);
mapNew.put("shortUrl", shortUrl);
listMap.add(mapNew);
if(listMap!=null&&listMap.size()>0){
count ++ ;
if(count%1000==0){
n = it.insertInto(listMap, "tb_advertise_appertime_detail",str.split(","));
System.err.println(" excute sql " + n);
listMap = new ArrayList<Map<String,Object>>();
}
}
strLine = br.readLine();
str = "ip"+","+"address"+","+"network"+","+"imageUrl"+","+"urlno"+","+"visitTime"+","+"shortUrl";
}
if(listMap!=null&&listMap.size()<1000){
n = it.insertInto(listMap, "tb_advertise_appertime_detail",str.split(","));
}
long endTime=System.currentTimeMillis();//统计计时结束
System.out.println("read line:"+count);
System.out.println("cost time:"+(endTime-startTime)+"ms");
br.close();
}
}
//每1000条封装为一条sql插入数据库
public Integer insertInto(List<Map<String,Object>> list,String tab,String[] parms){
int count=0;
if(list!=null&&list.size()>0){
String sql="";
for (int i=0;i<list.size();i++) {
StringBuffer insertSql=new StringBuffer();
Map<String,Object> map= list.get(i);
if(null!=map){
insertSql.append(" insert into ").append(tab).append(" ( ");
for (int a=0;a<parms.length;a++) {
insertSql.append(parms[a]).append(" , ");
}
String insql=insertSql.substring(0,insertSql.lastIndexOf(","))+" ) values( ";
insertSql=new StringBuffer();
for (int a=0;a<parms.length;a++) {
insertSql.append("'").append(map.get(parms[a])).append("'").append(" , ");
}
insql=insql+insertSql.substring(0,insertSql.lastIndexOf(","))+" ) ;";
sql=sql+insql+"\n";
}
}
if(null!=sql){
// System.out.println(sql);
count=dm.executUpdate(sql);
}
}
log.info("插入行数:"+count);
return count;
}