java爬取猫咪上的图片

首先是对知识点归纳

1.用到获取网页源代码,分析图片地址,发现图片的地址都是按编号排列的,所以想到用循环获取

2.保存图片要用到流操作和文件操作,对两部分知识进行了复习巩固

3.保存后的图片有一部分是广告文字,所以我又看了一下如何截取图片

下面上代码:

网页源代码不粘贴了

对网页源代码分析之后进行循环提取图片链接并下载:

  1 package 文件操作;
  2 
  3 import java.io.ByteArrayOutputStream;
  4 import java.io.File;
  5 import java.io.FileOutputStream;
  6 import java.io.IOException;
  7 import java.io.InputStream;
  8 import java.net.HttpURLConnection;
  9 import java.net.MalformedURLException;
 10 import java.net.URL;
 11 
 12 public class Pic696 {
 13 
 14     public static void main(String[] args) {
 15     for(int i =1;i<72;i++) {
 16         for (int j =1; j < 50; j++) {
 17 
 18             if(i<10&&j<10) {
 19                 String strUrl ="https://mmslt1.com/tp/girl/FEILIN/A-00"+i+"/0"+j+".jpg";
 20 //                System.out.println(i+" "+j);
 21                 System.out.println(strUrl);
 22                 URL url;
 23                 try {
 24                     url = new URL(strUrl);
 25                     try {
 26                         Pig(url, i, j);
 27                     } catch (IOException e) {
 28                         System.out.println("404");
 29                     }
 30                 } catch (MalformedURLException e) {
 31                     
 32                 }
 33                 
 34             }
 35             if(i<10&&j>=10) {
 36                 String strUrl ="https://mmslt1.com/tp/girl/FEILIN/A-00"+i+"/"+j+".jpg";
 37 //                System.out.println(i+" "+j);
 38                 System.out.println(strUrl);
 39                 URL url;
 40                 try {
 41                     url = new URL(strUrl);
 42                     try {
 43                         Pig(url, i, j);
 44                     } catch (IOException e) {
 45                         System.out.println("404");
 46                     }
 47                 } catch (MalformedURLException e) {
 48                     // TODO Auto-generated catch block
 49                     e.printStackTrace();
 50                 }
 51                 
 52             }
 53             if(i>=10&&j<10) {
 54                 String strUrl ="https://mmslt1.com/tp/girl/FEILIN/A-0"+i+"/0"+j+".jpg";
 55 //                System.out.println(i+" "+j);
 56                 System.out.println(strUrl);
 57                 URL url;
 58                 try {
 59                     url = new URL(strUrl);
 60                     try {
 61                     Pig(url, i, j);
 62                 } catch (IOException e) {
 63                     System.out.println("404");
 64                 }
 65                 } catch (MalformedURLException e) {
 66                     // TODO Auto-generated catch block
 67                     e.printStackTrace();
 68                 }
 69                 
 70             }
 71             if(i>=10&&j>=10) {
 72             String strUrl ="https://mmslt1.com/tp/girl/FEILIN/A-0"+i+"/"+j+".jpg";
 73 //            System.out.println(i+" "+j);
 74             System.out.println(strUrl);
 75             URL url;
 76             try {
 77                 url = new URL(strUrl);
 78                 try {
 79                 Pig(url, i, j);
 80             } catch (IOException e) {
 81                 System.out.println("404");
 82             }
 83             } catch (MalformedURLException e) {
 84                 // TODO Auto-generated catch block
 85                 e.printStackTrace();
 86             }
 87             
 88             }
 89         }
 90     }
 91             
 92 }
 93     
 94     static void Pig(URL urlStr,int i,int j) throws IOException {
 95         //构造连接
 96         HttpURLConnection conn = (HttpURLConnection)urlStr.openConnection();
 97         conn.setRequestMethod("GET");
 98         //这个网站要模拟浏览器才行
 99         conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0");
100         //打开连接
101         conn.connect();
102 
103         //打开这个网站的输入流
104         InputStream inStream = conn.getInputStream();
105         //用这个做中转站 ,把图片数据都放在了这里,再调用toByteArray()即可获得数据的byte数组
106         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
107         //用这个是很好的,不用一次就把图片读到了文件中
108         //要是需要把图片用作其他用途呢?所以直接把图片的数据弄成一个变量,十分有用
109         //相当于操作这个变量就能操作图片了
110 
111         byte [] buf = new byte[1024];
112         //为什么是1024?
113         //1024Byte=1KB,分配1KB的缓存
114         
115         int len = 0;
116 
117         //读取图片数据
118         while((len=inStream.read(buf))!=-1){
119 //        System.out.println(len);
120         outStream.write(buf,0,len);
121         }
122         inStream.close();
123         outStream.close();
124 
125         //把图片数据填入文件中
126 //        File files = new File("E://Pic696");
127 //        files.mkdirs();
128         File file = new File("E://Pic696/Pic"+i+"_"+j+".jpg");
129 
130         FileOutputStream op = new FileOutputStream(file);
131 
132         op.write(outStream.toByteArray());
133 
134         op.close();
135      }    
136 }

对下载下来的图片进行截取(获取原来的尺寸,减掉一部分)

 1 package 文件操作;
 2 
 3 import java.awt.Rectangle;
 4 import java.awt.image.BufferedImage;
 5 import java.io.BufferedReader;
 6 import java.io.File;
 7 import java.io.FileInputStream;
 8 import java.io.IOException;
 9 import java.io.InputStream;
10 import java.io.InputStreamReader;
11 import java.net.MalformedURLException;
12 import java.net.URL;
13 import java.util.Iterator;
14 
15 import javax.imageio.ImageIO;
16 import javax.imageio.ImageReadParam;
17 import javax.imageio.ImageReader;
18 import javax.imageio.stream.ImageInputStream;
19 import javax.swing.ImageIcon;
20 
21 
22 
23 public class cityScore {
24  
25     public static void main(String[] args) {
26         for (int i = 1; i < 10; i++) {
27             for (int j = 1; j < 40; j++) {
28                 String OldPic ="E:/Pic696/Pic"+i+"_"+j+".jpg";
29                 String NewPic ="E:/Pic696/new/Pic"+i+"_"+j+".jpg";
30                 ImageIcon imageIcon = new ImageIcon(OldPic);
31                 int iconWidth = imageIcon.getIconWidth();
32                 int iconHeight =imageIcon.getIconHeight();
33                 cutImage(OldPic, NewPic,iconWidth, iconHeight-70);
34             }
35         }
36         
37     }
38     
39     /**
40      * 图片裁剪
41      * @param srcImageFile 图片裁剪地址
42      * @param result 图片输出文件夹
43      * @param destWidth 图片裁剪宽度
44      * @param destHeight 图片裁剪高度
45      */
46     public final static void cutImage(String srcImageFile, String result,
47             int destWidth, int destHeight) {
48         try {
49             Iterator iterator = ImageIO.getImageReadersByFormatName("JPEG");/*PNG,BMP*/  
50             ImageReader reader = (ImageReader)iterator.next();/*获取图片尺寸*/
51             InputStream inputStream = new FileInputStream(srcImageFile); 
52             ImageInputStream iis = ImageIO.createImageInputStream(inputStream);  
53             reader.setInput(iis, true);  
54             ImageReadParam param = reader.getDefaultReadParam();  
55             Rectangle rectangle = new Rectangle(0,0, destWidth, destHeight);/*指定截取范围*/   
56             param.setSourceRegion(rectangle);  
57             BufferedImage bi = reader.read(0,param);
58             ImageIO.write(bi, "JPEG", new File(result));
59         } catch (Exception e) {
60             System.out.println(e);
61         }
62     }
63 }

 

posted @ 2019-07-29 22:12  嚼着炫迈去追梦  阅读(384)  评论(0编辑  收藏  举报
人家故里