利用httpclient和mysql模拟搜索引擎

数据抓取模块

package crowling1;


import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;

/**
 * An example that performs GETs from multiple threads.
 *
 */
public class ClientMultiThreadedExecution {

    public static void main(String[] args) throws Exception {
        // Create an HttpClient with the ThreadSafeClientConnManager.
        // This connection manager must be used if more than one thread will
        // be using the HttpClient.
        PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        cm.setMaxTotal(1000);

        CloseableHttpClient httpclient = HttpClients.custom()
                .setConnectionManager(cm)
                .build();
            GetThread[] threads = new GetThread[5000];
            for (int i = 0; i < threads.length; i++) {
                HttpGet httpget = new HttpGet(reportIP());
                threads[i] = new GetThread(httpclient, httpget, i + 1);
            }

            // start the threads
            for (int j = 0; j < threads.length; j++) {
                threads[j].start();
            }

            // join the threads
            for (int j = 0; j < threads.length; j++) {
                threads[j].join();
            }

        } finally {
            httpclient.close();
        }
    }


    /**
     * A thread that performs a GET.
     */
    static class GetThread extends Thread {

        private final CloseableHttpClient httpClient;
        private final HttpContext context;
        private final HttpGet httpget;
        private final int id;

        public GetThread(CloseableHttpClient httpClient, HttpGet httpget, int id) {
            this.httpClient = httpClient;
            this.context = new BasicHttpContext();
            this.httpget = httpget;
            this.id = id;
        }

        /**
         * Executes the GetMethod and prints some status information.
         */
        @Override
        public void run() {
            try {
                System.out.println(id + " - about to get something from " + httpget.getURI());
                CloseableHttpResponse response = httpClient.execute(httpget, context);
                try {
                    System.out.println(id + " - get executed");
                    // get the response body as an array of bytes
                    HttpEntity entity = response.getEntity();
                    String str = null;
                    if (entity != null) {
                        byte[] bytes = EntityUtils.toByteArray(entity);
                        str=new String(bytes,"utf-8");
                        System.out.println(id + " - " + bytes.length + " bytes read");
                    }
                    demo3 d=new demo3();
                    String mys="'"+httpget.getURI()+"'";
                    String ip=mys;
                    int begin=str.indexOf("<title>")+7;
                    int end=str.indexOf("</title>");
                    int debegin=str.indexOf("Description");
                    String title="";
                    if (begin!=-1){
                    title="'"+str.substring(begin, end)+"'";
                    }
                    String desc=null;
                    if (debegin!=-1){
                    desc="'"+str.substring(debegin, debegin+10)+"'";
                    }else {
                        desc="'没有获取到描述'";
                    }
                    System.out.println(title);
                    d.createconn();
                    String sql="insert into web values("+ip+","+title+","+desc+")";
                    d.savedata(sql);
                } finally {
                    response.close();
                }
            } catch (Exception e) {
                System.out.println(id + " - error: " + e);
            }
        }

    }
    static int a=110;
    static int b=75;
    static int c=114;
    static int d=0;
    public synchronized static String reportIP(){
        if (d==255){
            d=0;
            c++;
        }else if(b==255){
            b=0;
            a++;
        }else if(c==255){
            c=0;
            b++;
        }else {
            d++;
        }
        return new String("http://"+a+"."+b+"."+c+"."+d);

    }

}

数据存储模块

package crowling1;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

public class demo3 {
    public Connection con;
    public void createconn(){

      try{   
            //加载MySql的驱动类   
            Class.forName("com.mysql.jdbc.Driver") ;   
            }catch(ClassNotFoundException e){   
            System.out.println("找不到驱动程序类 ,加载驱动失败!");   
            e.printStackTrace() ;   
            }   
      String url = "jdbc:mysql://localhost:3306/webcro" ;    
         String username = "root" ;   
         String password = "root" ;   
        try{   
            //连接
        con =    
                 DriverManager.getConnection(url , username , password ) ;   
         }catch(SQLException se){   
        System.out.println("数据库连接失败!");   
        se.printStackTrace() ;   
         }   
    }
    public void savedata(String sql) throws SQLException{
        //sql insert into web 
             //、创建一个Statement    
               PreparedStatement pstmt = con.prepareStatement(sql) ;   
        // 执行SQL语句   
               int rows = pstmt.executeUpdate() ;   //如果没有返回,rows=0
    }
public static void main(String[] args) throws SQLException {
    demo3 d=new demo3();
    String ip="'255.255.255.253'";
    String title="'百度'";
    String desc="'百度'";
    d.createconn();
    String sql="insert into web values("+ip+","+title+","+desc+")";
    d.savedata(sql);
}
}

posted on 2015-05-28 13:59  MrCharles在cnblogs  阅读(108)  评论(0)    收藏  举报

导航