不归家的夜

导航

JAVA多线程超时加载当网页图片

先上图:

这一次没有采取正则匹配,而采取了最简单的java分割和替代方法进行筛选图片

它能够筛选如下的图片并保存到指定的文件夹

如:

“http://xxxx/xxxx/xxx.jpg”

'http://xxxx/xxxx/xxx.jpg'

如果中间的分隔符为\/而不是/,可进行替换

如将http:\/\/xxxxx\/xxx\/xxx.jpg替换为http://xxxxx/xxx/xxx.jpg

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/***
 * java抓取网络图片
 * 
 * @author ITWANG
 * 
 */
public class CatchImage
{

	// 抓去网页地址
	private static final String URL = "http://image.baidu.com/channel?c=%E7%BE%8E%E5%A5%B3&t=%E5%B0%8F%E6%B8%85%E6%96%B0&s=0";
	// 转换编码
	private static final String ECODING = "UTF-8";
	// 图片后缀
	private static final String[] picstuffix = { "jpg", "JPG", "gif", "GIF", "png", "PNG" };
	// 开启线程数
	private static int Threadcount = 3;
	// 超时时间
	private static int timeout = 4000;

	public static void main(String[] args) throws Exception
	{
		CatchImage cm = new CatchImage();
		// 获得html文本内容
		String HTML = cm.getHTML(URL);
		List<String> imgSrc = cm.getttr(HTML, picstuffix);
		List<String> pList = cm.picFilter(imgSrc);
		cm.TOThreadDownload(pList, "E:\\Imagesave" + saveDiff(), Threadcount, timeout);
	}

	/**
	 * 过滤http:\/\/xxxxxx\/xxxx\/xxx.jpg为http://xxxxxx/xxxx/xxx.jpg
	 * @param picurl 图片列表
	 * @return 返回过滤后的图片列表
	 */
	public List<String> picFilter(List<String> picurl)
	{
		List<String> list = new ArrayList<>();
		for (String string : picurl)
		{
			list.add(string.replace("\\/", "/"));
		}
		return list;
	}

	/**
	 * 获取但网页图片
	 * 
	 * @param htmlsource
	 *            html的string数据源
	 * @param picstuffix
	 *            后缀数组
	 * @return 返回图片地址
	 */
	public List<String> getttr(String htmlsource, String[] picstuffix)
	{
		List<String> listpic = new ArrayList<>();
		String[] htmlarray1 = htmlsource.split("\"");
		String[] htmlarray2 = htmlsource.split("\'");
		System.out.println("双引号分割:"+htmlarray1.length);
		for (int i = 0; i < htmlarray1.length; i++)
		{
			for (int j = 0; j < picstuffix.length; j++)
			{
				if (htmlarray1[i].startsWith("http") && htmlarray1[i].endsWith(picstuffix[j]))
				{
					listpic.add(htmlarray1[i]);
				}
			}
		}
		System.out.println("单引号分割:"+htmlarray2.length);
		for (int i = 0; i < htmlarray2.length; i++)
		{
			for (int j = 0; j < picstuffix.length; j++)
			{
				if (htmlarray2[i].startsWith("http") && htmlarray2[i].endsWith(picstuffix[j]))
				{
					listpic.add(htmlarray2[i]);
				}
			}
		}
		System.out.println(listpic.size());
		for (String string : listpic)
		{
			System.out.println(string);
		}
		return listpic;
	}

	/***
	 * 获取HTML内容,并且转为String
	 * 
	 * @param url
	 *            网页地址
	 * @return 返回字符串
	 * @throws Exception
	 *             连接网络失败
	 */
	private String getHTML(String url) throws Exception
	{
		URL uri = new URL(url);
		URLConnection connection = uri.openConnection();
		InputStream in = connection.getInputStream();
		byte[] buf = new byte[1024];
		int length = 0;
		StringBuffer sb = new StringBuffer();
		while ((length = in.read(buf, 0, buf.length)) > 0)
		{
			sb.append(new String(buf, ECODING));
		}
		in.close();
		return sb.toString();
	}

	/**
	 * 多线程超时下载
	 * 
	 * @param listImgSrc
	 *            图片地址列表
	 * @param savedir
	 *            保存文件夹
	 * @param tnum
	 *            开启线程数
	 * @param timeout
	 *            下载超时时间
	 */
	private void TOThreadDownload(List<String> listImgSrc, String savedir, int tnum, int timeout)
	{
		for (int i = 0; i < listImgSrc.size(); i += tnum)
		{
			new TODThread(savedir, tnum, listImgSrc, i, timeout).start();
		}
	}

	/**
	 * 
	 * 2014-4-3上午10:52:07 Describe: 超时方式下载照片线程
	 * 
	 * @author: ITWANG
	 */
	class TODThread extends Thread
	{
		private String savedir = null;
		private int tnum;
		private List<String> listImgSrc;
		private int bunm;
		private int timeout = 3000;

		public TODThread(String savedir, int tnum, List<String> listImgSrc, int bnum, int timeout)
		{
			this.savedir = savedir;
			this.tnum = tnum;
			this.listImgSrc = listImgSrc;
			this.bunm = bnum;
			this.timeout = timeout;
		}

		@Override
		public void run()
		{
			for (int i = 0; i < tnum; i++)
			{
				String url = listImgSrc.get(bunm + i);
				String sps = url.substring(url.lastIndexOf("."), url.length());
				String imageName = UUID.randomUUID().toString() + sps;
				try
				{
					if (getPic(url, savedir, imageName, timeout))
					{
						System.out.println("*^_^*");
					} else
					{
						System.out.println("-_-!");
					}
				} catch (Exception e)
				{
					System.out.println("下载异常:" + e);
				}
			}
		}
	}

	/**
	 * GET方式下载照片
	 * 
	 * @param purl
	 *            图片路径
	 * @param folder
	 *            保存文件夹
	 * @param filename
	 *            保存文件名
	 * @param timeout
	 *            超时时间
	 * @return 返回保存状态
	 * @throws Exception
	 */
	public boolean getPic(String purl, String folder, String filename, int timeout) throws Exception
	{
		URL url = new URL(purl);
		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
		conn.setConnectTimeout(timeout);
		conn.setRequestMethod("GET");
		conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30");
		conn.setDoOutput(true);
		conn.setDoInput(true);
		if (conn.getResponseCode() == 200)
		{
			InputStream is = conn.getInputStream();
			byte[] bs = new byte[1024];
			int len;
			File sf = new File(folder);
			if (!sf.exists())
			{
				sf.mkdirs();
			}
			OutputStream os = new FileOutputStream(sf.getPath() + "\\" + filename);
			while ((len = is.read(bs)) != -1)
			{
				os.write(bs, 0, len);
			}
			os.close();
			is.close();
			System.out.println("成功:" + url);
			return true;
		}
		System.out.println("失败:" + url);
		return false;
	}

	/**
	 * 时间文件夹
	 * 
	 * @return 返回当前时间
	 */
	public static String saveDiff()
	{
		SimpleDateFormat formate = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
		return "\\" + formate.format(System.currentTimeMillis()) + "\\";
	}

}

  

posted on 2014-05-14 17:13  不归家的夜  阅读(757)  评论(0)    收藏  举报