旧瓶新酒-获取网络资源即爬取下载页面内容(图片、html、css、js等)
这个java获取网络资源以前也写过不少
最近用到又重新写了一个,apache.commons.io中的例子就非常好,但是无法对请求进行详细设置
于是大部分照搬,局部替换以设置请求头
如需更加复杂的设置,可以考虑使用同为apche的httpComponents
*
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
/**
-
Created by wq on 2017/6/6.
*/
public class Download {
public static void main(String[] args) {
Download download = new Download();
String url = "http://img1.3lian.com/2015/w7/85/d/21.jpg";
String path1 = "E:\1.jpg";
String path2 = "E:\2.jpg";
String path3 = "E:\3.jpg";
String url2="http://www.baidu.com";
try {
download.apacheCommonsIoDownload(url, path1);
} catch (Exception e) {
e.printStackTrace();
}
try {
download.pureJavaNetDownload(url, path2);
} catch (Exception e) {
e.printStackTrace();
}
try {
download.mixedDownload(url, path3);
} catch (Exception e) {
e.printStackTrace();
}
try{
download.getContentAsString(url2);
}catch (Exception e){
e.printStackTrace();
}
}private void apacheCommonsIoDownload(String urlstr, String path) throws Exception {
apacheCommonsIoDownload(urlstr, new File(path));
}private void apacheCommonsIoDownload(String urlstr, File file) throws Exception {
FileUtils.copyURLToFile(new URL(urlstr), file);
}private void pureJavaNetDownload(String urlstr, String path) throws Exception {
pureJavaNetDownload(urlstr, new File(path));
}//无需依赖
private void pureJavaNetDownload(String urlstr, File file) throws Exception {
URL url = new URL(urlstr);
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
httpURLConnection.setRequestMethod("GET");
//有的网站屏蔽程序抓取 添加User-Agent头信息以避免403
httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0");
httpURLConnection.setConnectTimeout(10000);
httpURLConnection.setReadTimeout(10000);
// httpURLConnection.set...更多请求设置
httpURLConnection.connect();
InputStream is = httpURLConnection.getInputStream();
// 不需要设置可以直接下面 也就是org.apache.commons.io.FileUtils中copyURLToFile(URL source, File destination)的写法
// InputStream is=url.openStream();
try {
FileOutputStream fos = new FileOutputStream(file);
try {
// 照搬org.apache.commons.io.IOUtils
// IOUtils.copy(InputStream input, OutputStream output) 开始
byte[] buffer = new byte[1024 * 4];
int n;
while (-1 != (n = is.read(buffer))) {
fos.write(buffer, 0, n);
}
// IOUtils.copy(InputStream input, OutputStream output) 结束
} finally {
try {
if (is != null) {
fos.close();
}
} catch (IOException ioe) {
// ignore
}
}
} finally {
try {
if (is != null) {
is.close();
}
} catch (IOException ioe) {
// ignore
}
}
}private void mixedDownload(String urlstr, String path) throws Exception {
mixedDownload(urlstr, new File(path));
}//使用IOUtils减少代码量 弃用FileUtils以对请求进行详细设置 推荐
private void mixedDownload(String urlstr, File file) throws Exception {
URL url = new URL(urlstr);
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
httpURLConnection.setRequestMethod("GET");
httpURLConnection.setRequestProperty("User-Agent", "Mozilla/4.0");
httpURLConnection.setConnectTimeout(10000);
httpURLConnection.setReadTimeout(10000);
httpURLConnection.connect();
InputStream is = httpURLConnection.getInputStream();
try {
FileOutputStream output = FileUtils.openOutputStream(file);
try {
IOUtils.copy(is, output);
} finally {
IOUtils.closeQuietly(output);
}
} finally {
IOUtils.closeQuietly(is);
}
}private void getContentAsString(String urlstr) throws Exception {
URL url = new URL(urlstr);
InputStream is=url.openStream();
ByteArrayOutputStream bos=new ByteArrayOutputStream();
IOUtils.copy(is, bos);
System.out.println(bos.toString());
}
}
能用注解的,尽量不用xml,看着xml就烦!!!
浙公网安备 33010602011771号