初步分析美国科研项目申报项目
将XML转换为新格式的XML、excel、和SQlserver数据
package hander;
/**
* @author ZTX * @function:读取XML中字段到excel中
* @Configuration:将文件放在 Xml\data下,或者data改为您的数据文件夹置于 Xml\下
* @time:2013年11月12日19:37:45--2013年11月12日21:48:22--2013年11月13日17:28:45--2013年11月13日21:00:45
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.dom4j.*;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.apache.poi.*;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
public class Read {
public static String inputXmlDir;//要处理的xml文件的文件夹
public static String data="2006";//要处理的xml文件的文件夹名称默认是data
public static boolean head=false;//表头是否写了
public static String pathPrefix="";//文件路径前缀
public static long recordNum=0;//记录批量
public static void main(String args[]){
PreparedStatement ppst = null ;
ConnectSQLserver mssq=new ConnectSQLserver(true);//连sqlserver
//建表
String reCreWord=
"create table Project"+
" "+
"( "+
"AwardTitle varchar(200) null, "+
"Time varchar(1000) null, "+
"Organization varchar(200) null, "+
"ProgramOfficer varchar(100) null, "+
"AbstractNarration varchar(8000) null, "+
"AwardID varchar(10) null, "+
"PrincipalInvestigator varchar(60) null, "+
"EmailAddress varchar(40) null, "+
"Name varchar(80) null, "+
"CountryName varchar(60) null, "+
"StateName varchar(60) null, "+
"CityName varchar(60) null, "+
"ProgramReference varchar(500) null, "+
") ";
//插入
String inSQL="insert into Project(" +
"AwardTitle,Time,Organization," +
"ProgramOfficer,AbstractNarration," +
"AwardID,PrincipalInvestigator,EmailAddress," +
"Name,CountryName,StateName," +
"CityName,ProgramReference)values(?,?,?,?,?,?,?,?,?,?,?,?,?);";
try {
mssq.sta.execute(reCreWord);
System.out.println("操作新表:"+inSQL);
ppst=mssq.conn.prepareStatement(inSQL);
} catch (SQLException e1) {
e1.printStackTrace();
}
// try {
// //mssq.conn.setAutoCommit(false);
// } catch (SQLException e1) {
// e1.printStackTrace();
// }
long start,end=0;
//创建空表
Document creDocument = DocumentHelper.createDocument();
//添加空字段
//项目标题
Element root= creDocument.addElement("root");
Element element[ ] = new Element[13];
element[0] = root.addElement("AwardTitle");
element[1] = root.addElement("Time");
element[2]= root.addElement("Organization");
element[3] = root.addElement("ProgramOfficer");
element[4]= root.addElement("AbstractNarration");
element[5]= root.addElement("AwardID");
element[6]= root.addElement("PrincipalInvestigator");
element[7]= root.addElement("EmailAddress");
element[8] = root.addElement("Name");
element[9]= root.addElement("CountryName");
element[10] = root.addElement("StateName");
element[11]= root.addElement("CityName");
element[12]= root.addElement("ProgramReference");
SAXReader saxReader = new SAXReader();
//得到待处理的文件夹
//前缀
File getPrefix = new File("");// 空文件夹 用来得到 文件路径前缀
pathPrefix=getPrefix.getAbsolutePath();//得到 文件路径前缀
//组合
inputXmlDir=File.separator + data;
inputXmlDir=pathPrefix+inputXmlDir;//文件夹绝对路径
System.out.println(inputXmlDir);
File file = new File(data);
//遍历文件夹下的每个文件
if(file.isDirectory()){
File[] files = file.listFiles();
start=System.currentTimeMillis();
Document document;
for(int i = 0; i < files.length; i++){
System.out.println("第"+(i+1)+"篇");
try {
document = saxReader.read(inputXmlDir+File.separator + files[i].getName());
//拿出一个xml去处理
try {
String record[]=Handle( document,creDocument ,element);
//System.out.println("record[0]:"+record[0].toString());
for( int k=0;k<record.length;k++ ){
ppst.setString(k+1, record[k]);
//System.out.println(k);
// System.out.println(record[k]);
}
ppst.execute();
} catch (SQLException e) {
e.printStackTrace();
}
} catch (DocumentException e) {
e.printStackTrace();
}
}
end=System.currentTimeMillis()-start;
System.out.println("所用时间:"+end/1000+"s");
}
//输出xml结果文件
try{
XMLWriter output = new XMLWriter(
new FileWriter( new File(inputXmlDir+File.separator+"result.xml") ));
output.write( creDocument );
output.close();
}
catch(IOException e){
System.out.println(e.getMessage());
}
}
//处理一个xml
public static String[] Handle(Document document, Document creDocument, Element[] element) throws SQLException{
String [] record=new String [13];
//解析
Element root=document.getRootElement(); // 获取根节点
Iterator Award = root.elementIterator("Award"); //根节点#Award
// 遍历Award节点:
while (Award.hasNext()) {
Element AwardEle = (Element) Award.next();
//项目标题
String AwardTitle = AwardEle.elementTextTrim("AwardTitle"); // Award#AwardTitle
//添加到xml中
Element Title=element[0].addElement("Title");//AwardTitle的子标签
Title.setText(AwardTitle);
record[0]=AwardTitle;
//项目时间
String Time="";
String AwardEffectiveDate=AwardEle.elementTextTrim("AwardEffectiveDate"); // Award#AwardEffectiveDate
String AwardExpirationDate=AwardEle.elementTextTrim("AwardExpirationDate"); // Award#AwardExpirationDate
Time=AwardEffectiveDate +"-"+AwardExpirationDate;
Element time=element[1].addElement("time");
time.setText(Time);
record[1]=Time;
//组织
String org="";
Iterator Organization = AwardEle.elementIterator("Organization");//Award#Organization
// 遍历Organization节点:
while (Organization.hasNext()) {
Element OrganizationEle = (Element) Organization.next();
//组织合成
org= getChild(OrganizationEle,"Division","LongName")+">"+getChild(OrganizationEle,"Directorate","LongName");
}
Element organization=element[2].addElement("organization");
organization.setText(org);
record[2]=org;
//项目主任
String proOffice="";
proOffice=getChild(AwardEle,"ProgramOfficer","SignBlockName");
Element officers=element[3].addElement("officers");
officers.setText(proOffice);
record[3]=proOffice;
//摘要叙述
String abs="";
abs=getChild(AwardEle,"","AbstractNarration");
Element abstracts=element[4].addElement("abstracts");
abstracts.setText(abs);
record[4]=abs;
//项目编号
String Id="";
Id=getChild(AwardEle,"","AwardID");
Element id=element[5].addElement("id");
id.setText(Id);
record[5]=Id;
//主要研究者
String Inv="";
Inv=getChild(AwardEle,"Investigator","FirstName")+" "+getChild(AwardEle,"Investigator","LastName");
if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
Element investigator=element[6].addElement("investigator");
investigator.setText(Inv);
record[6]=Inv;
}
//主要研究者邮箱
String Em="";
Em=getChild(AwardEle,"Investigator","EmailAddress");
if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
Element email=element[7].addElement("email");
email.setText(Em);
record[7]=Em;
}
//机构名称
String Name="";
Name=getChild(AwardEle,"Institution","Name");
Element name=element[8].addElement("name");
name.setText(Name);
record[8]=Name;
//机构国家
String Nation="";
Nation=getChild(AwardEle,"Institution","CountryName");
Element nation=element[9].addElement("nation");
nation.setText(Nation);
record[9]=Nation;
//机构州
String State="";
State=getChild(AwardEle,"Institution","StateName");
Element state=element[10].addElement("state");
state.setText(State);
record[10]=State;
//机构城市
String City="";
City=getChild(AwardEle,"Institution","CityName");
Element city=element[11].addElement("city");
city.setText(City);
record[11]=City;
//研究主题
String Ref="";
//(2)子节点的子元素
Iterator iters = AwardEle.elementIterator("ProgramReference");
//遍历mom父节点
while (iters.hasNext()){
Element itemEle = (Element) iters.next();
Ref+=itemEle.elementTextTrim("Text")+" ;"; // 拿到head下的子节点script下的字节点username的值
}
Element reference=element[12].addElement("reference");
reference.setText(Ref);
record[12]=Ref;
}
// ppst.addBatch();
// ppst.executeBatch();
// ppst.execute();
recordNum++;
if(!head){//写表头
String heads[]=new String [13];
heads[0]="项目标题";
heads[1]="项目时间";
heads[2]="项目主任";
heads[3]="组织";
heads[4]="摘要叙述";
heads[5]="项目编号";
heads[6]="主要研究者";
heads[7]="主要研究者邮箱";
heads[8]="机构名称";
heads[9]="机构国家";
heads[10]="机构州";
heads[11]="机构城市";
heads[12]="研究主题";
//writeResult2Excel(heads);
head=true;
}
//writeResult2Excel(record);//数据量大就没用
return record;
}
//写入excel一行
public static void writeResult2Excel(String [] record) {
String DB=pathPrefix+File.separator+"data/results.xls";
HSSFWorkbook wb = getDBwb(DB);//据已有的DB 建立新HSSFWorkbook对象
HSSFSheet sheet = wb.getSheet("sheet1");//得到新的sheet对象
int rowNum=sheet.getLastRowNum();//获得总行数
int coloumNum=13;//总列数
HSSFRow rowTo =null;
HSSFCell cellTo =null;
rowTo = sheet.createRow(rowNum+1);
for(int j=0;j<coloumNum;j++){//列循环
cellTo=rowTo.createCell(j);
cellTo.setCellValue(record[j]);
}
//要把结果写到文件中去
writeBack2Excel(DB,wb);
}//writeBack
public static HSSFWorkbook getDBwb(String getDB) {
HSSFWorkbook workbook=null;
try {
FileInputStream fis=new FileInputStream(getDB);
workbook=new HSSFWorkbook(fis);
} catch (IOException e) {
e.printStackTrace();
}
return workbook;
}
public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
FileOutputStream fileOut;
try {
fileOut = new FileOutputStream(DBurl);
wb.write(fileOut);//把Workbook对象输出到文件中
fileOut.flush();
fileOut.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static String getChild(Element AwardEle,String mom,String kid) {
String target="";
//(1)直接输出的是根节点的子元素
if(mom.equals("")){
target= AwardEle.elementTextTrim(kid);
}
//(2)子节点的子元素
Iterator iters = AwardEle.elementIterator(mom);
//遍历mom父节点
while (iters.hasNext()){
Element itemEle = (Element) iters.next();
target= itemEle.elementTextTrim(kid); // 拿到head下的子节点script下的字节点username的值
}
return target;
}
}

浙公网安备 33010602011771号