提取网页的图片链接的Java程序
输入网页文件名,和资源列表文件名
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
第二个文件时解析文件
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
1
//AnalizeIMG.java
2
3
//主程序
4
5
import java.io.BufferedReader;
6
import java.io.File;
7
import java.io.FileReader;
8
import java.io.FileWriter;
9
import java.io.IOException;
10
11
12
public class AnalizeIMG {
13
14
public void p(String s)
15
{
16
System.out.println(s);
17
}
18
19
public void analizeFile(String infile,String outfile) throws Exception
20
{
21
File file = new File(infile);
22
if (file == null || !file.exists()) {
23
p("File " + infile + " not exits !");
24
}
25
26
if (!file.canRead()) {
27
p("File " + infile + " can't read !");
28
29
}
30
31
String strLine = null;
32
FileReader frd = new FileReader(infile);
33
BufferedReader bufferedReader = new BufferedReader(frd);
34
try {
35
AnalizeWebParse parse = new AnalizeWebParse();
36
String s = parse.parse(bufferedReader);
37
38
createFile(outfile,s);
39
40
} catch (Exception ex) {
41
throw ex;
42
} finally {
43
frd.close();
44
bufferedReader.close();
45
}
46
}
47
48
private void createFile(String filename, String content) {
49
FileWriter f = null;
50
try {
51
f = new FileWriter(filename);
52
if (f == null || content == null) {
53
return;
54
}
55
56
f.write(content);
57
f.flush();
58
f.close();
59
60
} catch (Exception e) {
61
62
} finally {
63
if (f != null) {
64
try {
65
f.close();
66
} catch (Exception e) {
67
68
}
69
}
70
}
71
}
72
73
public static void main(String arg[])
74
{
75
AnalizeIMG ana = new AnalizeIMG();
76
try{
77
ana.analizeFile("E:\\1.txt","E:\\out.lst");
78
}catch (Exception ex) {
79
ex.printStackTrace();
80
}
81
}
82
}
83
84
//AnalizeIMG.java2

3
//主程序4

5
import java.io.BufferedReader;6
import java.io.File;7
import java.io.FileReader;8
import java.io.FileWriter;9
import java.io.IOException;10

11

12
public class AnalizeIMG {13

14
public void p(String s)15
{16
System.out.println(s);17
}18

19
public void analizeFile(String infile,String outfile) throws Exception20
{21
File file = new File(infile);22
if (file == null || !file.exists()) {23
p("File " + infile + " not exits !");24
}25

26
if (!file.canRead()) {27
p("File " + infile + " can't read !");28

29
}30
31
String strLine = null;32
FileReader frd = new FileReader(infile);33
BufferedReader bufferedReader = new BufferedReader(frd);34
try {35
AnalizeWebParse parse = new AnalizeWebParse();36
String s = parse.parse(bufferedReader);37
38
createFile(outfile,s);39
40
} catch (Exception ex) {41
throw ex;42
} finally {43
frd.close();44
bufferedReader.close();45
}46
}47

48
private void createFile(String filename, String content) {49
FileWriter f = null;50
try {51
f = new FileWriter(filename);52
if (f == null || content == null) {53
return;54
}55

56
f.write(content);57
f.flush();58
f.close();59

60
} catch (Exception e) {61

62
} finally {63
if (f != null) {64
try {65
f.close();66
} catch (Exception e) {67

68
}69
}70
}71
}72

73
public static void main(String arg[])74
{75
AnalizeIMG ana = new AnalizeIMG();76
try{77
ana.analizeFile("E:\\1.txt","E:\\out.lst");78
}catch (Exception ex) {79
ex.printStackTrace();80
}81
}82
}83

84

第二个文件时解析文件
1
//AnalizeWebParse.java
2
3
//网页分析代码,需要用户根据自己需要做适当修改
4
5
import java.io.BufferedReader;
6
import java.io.StringReader;
7
import java.util.regex.Pattern;
8
9
import javax.swing.text.MutableAttributeSet;
10
import javax.swing.text.html.HTML;
11
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
12
import javax.swing.text.html.parser.ParserDelegator;
13
14
public class AnalizeWebParse extends ParserCallback {
15
16
StringBuffer sb = new StringBuffer();
17
18
boolean start = false;
19
boolean finished = false;
20
21
public void p(String s)
22
{
23
System.out.println(s);
24
}
25
26
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,
27
int pos) {
28
29
if(finished == true)
30
{
31
return;
32
}
33
34
if (start == false) {
35
if (tag == HTML.Tag.DIV) {
36
String cla = (String) attribs
37
.getAttribute(HTML.Attribute.CLASS);
38
if (cla == null) {
39
return;
40
}
41
42
if (cla.indexOf("body") != -1) {
43
// Start
44
start = true;
45
}
46
}
47
}
48
}
49
50
public void handleEndTag(HTML.Tag tag, int pos) {
51
if (tag == HTML.Tag.DIV && start == true && finished == false) {
52
finished = true;
53
}
54
}
55
56
public void handleText(char[] text, int pos) {
57
58
}
59
60
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
61
if (t == HTML.Tag.IMG) {
62
// get a src
63
String src = (String) a.getAttribute(HTML.Attribute.SRC);
64
if (src == null) {
65
return;
66
}
67
68
if (Pattern.matches("^(http://.+)", src)) {
69
sb.append(src).append("\n");
70
}
71
}
72
}
73
74
public String parse(BufferedReader file) throws Exception {
75
if(file==null)
76
{
77
return null;
78
}
79
80
ParserDelegator pd = new ParserDelegator();
81
try {
82
pd.parse(file, this, true);
83
} catch (Exception e) {
84
throw e;
85
}
86
87
return sb.toString();
88
}
89
}
90
//AnalizeWebParse.java2

3
//网页分析代码,需要用户根据自己需要做适当修改4

5
import java.io.BufferedReader;6
import java.io.StringReader;7
import java.util.regex.Pattern;8

9
import javax.swing.text.MutableAttributeSet;10
import javax.swing.text.html.HTML;11
import javax.swing.text.html.HTMLEditorKit.ParserCallback;12
import javax.swing.text.html.parser.ParserDelegator;13

14
public class AnalizeWebParse extends ParserCallback {15

16
StringBuffer sb = new StringBuffer();17

18
boolean start = false;19
boolean finished = false;20

21
public void p(String s)22
{23
System.out.println(s);24
}25

26
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,27
int pos) {28
29
if(finished == true)30
{31
return;32
}33
34
if (start == false) {35
if (tag == HTML.Tag.DIV) {36
String cla = (String) attribs37
.getAttribute(HTML.Attribute.CLASS);38
if (cla == null) {39
return;40
}41

42
if (cla.indexOf("body") != -1) {43
// Start44
start = true;45
}46
}47
}48
}49

50
public void handleEndTag(HTML.Tag tag, int pos) {51
if (tag == HTML.Tag.DIV && start == true && finished == false) {52
finished = true; 53
}54
}55

56
public void handleText(char[] text, int pos) {57

58
}59

60
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {61
if (t == HTML.Tag.IMG) {62
// get a src63
String src = (String) a.getAttribute(HTML.Attribute.SRC);64
if (src == null) {65
return;66
}67
68
if (Pattern.matches("^(http://.+)", src)) {69
sb.append(src).append("\n");70
}71
}72
}73

74
public String parse(BufferedReader file) throws Exception {75
if(file==null)76
{77
return null;78
}79
80
ParserDelegator pd = new ParserDelegator();81
try {82
pd.parse(file, this, true);83
} catch (Exception e) {84
throw e;85
}86
87
return sb.toString();88
}89
}90




浙公网安备 33010602011771号