初步分析美国科研项目申报项目

 

将XML转换为新格式的XML、excel、和SQlserver数据

 

package hander;

/**
 * @author ZTX * @function:读取XML中字段到excel中
 * @Configuration:将文件放在 Xml\data下,或者data改为您的数据文件夹置于 Xml\下
 * @time:2013年11月12日19:37:45--2013年11月12日21:48:22--2013年11月13日17:28:45--2013年11月13日21:00:45
 */

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.dom4j.*;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

import org.apache.poi.*;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

public class Read { 
	public static String inputXmlDir;//要处理的xml文件的文件夹
	public static String data="2006";//要处理的xml文件的文件夹名称默认是data
	public static boolean head=false;//表头是否写了
	public static String pathPrefix="";//文件路径前缀
	
	public static long  recordNum=0;//记录批量
	public static void main(String args[]){
		PreparedStatement ppst = null ;
		ConnectSQLserver mssq=new ConnectSQLserver(true);//连sqlserver
		//建表
		String reCreWord=
				"create table Project"+
				"     "+
				"(                        "+
				"AwardTitle varchar(200) null, "+
				"Time varchar(1000) null, "+
				"Organization varchar(200) null, "+
				"ProgramOfficer varchar(100) null, "+
				"AbstractNarration varchar(8000) null, "+
				"AwardID varchar(10) null, "+
				"PrincipalInvestigator varchar(60) null, "+
				"EmailAddress varchar(40) null, "+
				"Name varchar(80) null, "+
				"CountryName varchar(60) null, "+
				"StateName varchar(60) null, "+
				"CityName varchar(60) null, "+
				"ProgramReference varchar(500) null, "+
				")                        ";
		//插入
		String inSQL="insert into Project(" +
				"AwardTitle,Time,Organization," +
				"ProgramOfficer,AbstractNarration," +
				"AwardID,PrincipalInvestigator,EmailAddress," +
				"Name,CountryName,StateName," +
				"CityName,ProgramReference)values(?,?,?,?,?,?,?,?,?,?,?,?,?);";

		try {
			 mssq.sta.execute(reCreWord);
			System.out.println("操作新表:"+inSQL);
			ppst=mssq.conn.prepareStatement(inSQL);
		} catch (SQLException e1) {
			e1.printStackTrace();
		}
//		try {
//			//mssq.conn.setAutoCommit(false);
//		} catch (SQLException e1) {
//			e1.printStackTrace();
//		}
		
		long start,end=0;
		//创建空表
		Document creDocument = DocumentHelper.createDocument();    
		//添加空字段
		//项目标题
		Element root= creDocument.addElement("root");
		Element element[ ] = new Element[13];
		element[0] =  root.addElement("AwardTitle"); 
		element[1] = root.addElement("Time"); 
		element[2]=  root.addElement("Organization"); 
		element[3] =  root.addElement("ProgramOfficer"); 
		element[4]=  root.addElement("AbstractNarration"); 
		element[5]=  root.addElement("AwardID"); 
		element[6]=  root.addElement("PrincipalInvestigator"); 
		element[7]=  root.addElement("EmailAddress"); 
		element[8] = root.addElement("Name"); 
		element[9]=  root.addElement("CountryName"); 
		element[10] = root.addElement("StateName"); 
		element[11]=  root.addElement("CityName"); 
		element[12]=  root.addElement("ProgramReference");
		
        SAXReader saxReader = new SAXReader();   
		//得到待处理的文件夹
			//前缀
        File getPrefix = new File("");// 空文件夹 用来得到 文件路径前缀
        pathPrefix=getPrefix.getAbsolutePath();//得到 文件路径前缀
        	//组合
        inputXmlDir=File.separator + data;
        inputXmlDir=pathPrefix+inputXmlDir;//文件夹绝对路径
        System.out.println(inputXmlDir);
        File file = new File(data);
        //遍历文件夹下的每个文件
	    if(file.isDirectory()){ 
	        File[] files = file.listFiles();
	        start=System.currentTimeMillis();
	        Document document;
	        for(int i = 0; i < files.length; i++){
	        	 System.out.println("第"+(i+1)+"篇");
	            try {
	            	document = saxReader.read(inputXmlDir+File.separator + files[i].getName());
					//拿出一个xml去处理
	        			try {
							String record[]=Handle( document,creDocument ,element);
							//System.out.println("record[0]:"+record[0].toString());
 							for( int k=0;k<record.length;k++ ){
 								 ppst.setString(k+1, record[k]);
 								//System.out.println(k);
 								// System.out.println(record[k]);
 							}
							ppst.execute();
	        			} catch (SQLException e) {
							e.printStackTrace();
						}
	            } catch (DocumentException e) {
	            	e.printStackTrace();
	            }
	        }
	        end=System.currentTimeMillis()-start;
	        System.out.println("所用时间:"+end/1000+"s");
	     }
	    //输出xml结果文件
	    try{
	        XMLWriter output = new XMLWriter(
	            new FileWriter( new File(inputXmlDir+File.separator+"result.xml") ));
	            output.write( creDocument );
	            output.close();
	    }
	    catch(IOException e){
	    	System.out.println(e.getMessage());
	    }
	}
 
	//处理一个xml 
	public static String[] Handle(Document document, Document creDocument, Element[] element) throws SQLException{
		String [] record=new String [13];
		
		//解析
		Element root=document.getRootElement();    // 获取根节点
        Iterator Award = root.elementIterator("Award"); //根节点#Award
        // 遍历Award节点:
        while (Award.hasNext()) {
            Element AwardEle = (Element) Award.next();
            //项目标题
            String AwardTitle = AwardEle.elementTextTrim("AwardTitle"); //  Award#AwardTitle 
              //添加到xml中
            Element Title=element[0].addElement("Title");//AwardTitle的子标签
            Title.setText(AwardTitle);
            record[0]=AwardTitle;
            //项目时间
            String Time="";
            String AwardEffectiveDate=AwardEle.elementTextTrim("AwardEffectiveDate"); //  Award#AwardEffectiveDate 
            String AwardExpirationDate=AwardEle.elementTextTrim("AwardExpirationDate"); //  Award#AwardExpirationDate 
            Time=AwardEffectiveDate +"-"+AwardExpirationDate;
            
            Element time=element[1].addElement("time"); 
            time.setText(Time);
            record[1]=Time;
            //组织
            String org="";
            Iterator Organization = AwardEle.elementIterator("Organization");//Award#Organization
            	// 遍历Organization节点:
            while (Organization.hasNext()) {
                Element OrganizationEle = (Element) Organization.next();
                //组织合成
                org= getChild(OrganizationEle,"Division","LongName")+">"+getChild(OrganizationEle,"Directorate","LongName");
            }
            
            Element organization=element[2].addElement("organization"); 
            organization.setText(org);
            record[2]=org;
           //项目主任
           String  proOffice="";
           proOffice=getChild(AwardEle,"ProgramOfficer","SignBlockName");
           
           Element officers=element[3].addElement("officers"); 
           officers.setText(proOffice);
           record[3]=proOffice;
           //摘要叙述
           String  abs="";
           abs=getChild(AwardEle,"","AbstractNarration");
           
           Element abstracts=element[4].addElement("abstracts"); 
           abstracts.setText(abs);
           record[4]=abs;
           //项目编号
           String  Id="";
           Id=getChild(AwardEle,"","AwardID");
           
           Element id=element[5].addElement("id"); 
           id.setText(Id);
           record[5]=Id;
           //主要研究者
           String  Inv="";
           Inv=getChild(AwardEle,"Investigator","FirstName")+" "+getChild(AwardEle,"Investigator","LastName");
           if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
               Element investigator=element[6].addElement("investigator"); 
               investigator.setText(Inv);
               record[6]=Inv;
           }
           //主要研究者邮箱
           String  Em="";
           Em=getChild(AwardEle,"Investigator","EmailAddress");
           if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
        	   Element email=element[7].addElement("email"); 
               email.setText(Em);
               record[7]=Em;
           }
           //机构名称
           String  Name="";
           Name=getChild(AwardEle,"Institution","Name");
           
           Element name=element[8].addElement("name"); 
           name.setText(Name);
           record[8]=Name;
           //机构国家
           String  Nation="";
           Nation=getChild(AwardEle,"Institution","CountryName");
          
           Element nation=element[9].addElement("nation"); 
           nation.setText(Nation);
           record[9]=Nation;
           //机构州
           String  State="";
           State=getChild(AwardEle,"Institution","StateName");
          
           Element state=element[10].addElement("state"); 
           state.setText(State);
           record[10]=State;
           //机构城市
           String  City="";
           City=getChild(AwardEle,"Institution","CityName");
          
           Element city=element[11].addElement("city"); 
           city.setText(City);
           record[11]=City;
           //研究主题
           String  Ref="";
          //(2)子节点的子元素
   		   Iterator iters = AwardEle.elementIterator("ProgramReference");
   		  //遍历mom父节点
   		   while (iters.hasNext()){
   		       Element itemEle = (Element) iters.next();
   		    Ref+=itemEle.elementTextTrim("Text")+" ;"; // 拿到head下的子节点script下的字节点username的值
   		   }
   		 Element reference=element[12].addElement("reference"); 
   		 reference.setText(Ref);
   		record[12]=Ref;
        } 
       // ppst.addBatch();
       // ppst.executeBatch();
       // ppst.execute();
		recordNum++;
        if(!head){//写表头
        	String heads[]=new String [13];
        	heads[0]="项目标题";
        	heads[1]="项目时间";
        	heads[2]="项目主任";
        	heads[3]="组织";
        	heads[4]="摘要叙述";
        	heads[5]="项目编号";
        	heads[6]="主要研究者";
        	heads[7]="主要研究者邮箱";
        	heads[8]="机构名称";
        	heads[9]="机构国家";
        	heads[10]="机构州";
        	heads[11]="机构城市";
        	heads[12]="研究主题";
        	//writeResult2Excel(heads);
        	head=true;
        }
       
        //writeResult2Excel(record);//数据量大就没用
        return record;
	}
	//写入excel一行
	public static void writeResult2Excel(String [] record) {
		String DB=pathPrefix+File.separator+"data/results.xls";	
		HSSFWorkbook wb = getDBwb(DB);//据已有的DB 建立新HSSFWorkbook对象  
		HSSFSheet sheet = wb.getSheet("sheet1");//得到新的sheet对象	
		int rowNum=sheet.getLastRowNum();//获得总行数
		int coloumNum=13;//总列数
		 HSSFRow rowTo =null;
		 HSSFCell cellTo =null;
			 rowTo = sheet.createRow(rowNum+1);
			 for(int j=0;j<coloumNum;j++){//列循环
				 cellTo=rowTo.createCell(j);
				 cellTo.setCellValue(record[j]);
			 } 
		//要把结果写到文件中去
		writeBack2Excel(DB,wb);
	}//writeBack
	public static HSSFWorkbook getDBwb(String getDB) {
		HSSFWorkbook workbook=null;
		try {
			  FileInputStream fis=new FileInputStream(getDB);
		       workbook=new HSSFWorkbook(fis); 
		} catch (IOException e) {
			e.printStackTrace();
		}
		return workbook;
	}
	public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
		 FileOutputStream fileOut;
		try {
			fileOut = new FileOutputStream(DBurl);		
			wb.write(fileOut);//把Workbook对象输出到文件中  
			fileOut.flush();              
			fileOut.close(); 
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}  
	}
	private static String getChild(Element AwardEle,String mom,String kid) {
		String target="";
		//(1)直接输出的是根节点的子元素
		if(mom.equals("")){
		    target= AwardEle.elementTextTrim(kid); 
		}
		//(2)子节点的子元素
		Iterator iters = AwardEle.elementIterator(mom);
		//遍历mom父节点
		   while (iters.hasNext()){
		       Element itemEle = (Element) iters.next();
		       target= itemEle.elementTextTrim(kid); // 拿到head下的子节点script下的字节点username的值
		   }
		return target;
	}
}


 

 

posted @ 2013-11-14 21:20  pangbangb  阅读(560)  评论(0)    收藏  举报