网络内容爬取、文件操作、KMP匹配
1 import java.net.MalformedURLException;
2 import java.net.URL;
3 import java.net.URLConnection;
4 import java.util.ArrayList;
5 import java.io.*;
6 import java.util.Scanner;
7
8 class GetHtmlData {
9 private String url;
10 GetHtmlData(){//构造函数
11 }
12 URL readurl;
13 String bq(String pt) {
14 String ps;
15 ps=new String();
16 if(pt.charAt(0)!='h'&&pt.charAt(1)!='t'&&pt.charAt(2)!='t'&&pt.charAt(3)!='p') {
17 ps="https://"+pt;
18 return ps;
19 }
20 return pt;
21 }
22 String GetUrl(String pt,boolean check) throws IOException {
23 if(check==true)
24 pt=bq(pt);//补全网址
25 url=new String();
26 url=pt;//init url
27 readurl=new URL(url);//url字符串构建URL类
28 URLConnection connection = readurl.openConnection();//初始化连接
29 connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");
30 connection.connect();//开始连接
31 String line;
32 String web_data;
33 web_data=new String();
34 line=new String();
35 //输入流
36 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(),"GBK"));
37 while((line=in.readLine())!=null) {//java 中 null 为小写
38 web_data+=line;
39 }
40 return web_data;
41 }
42
43 String GetUrl_utf(String pt,boolean check) throws IOException {
44 if(check==true)
45 pt=bq(pt);//补全网址
46 url=new String();
47 url=pt;//init url
48 readurl=new URL(url);//url字符串构建URL类
49 URLConnection connection = readurl.openConnection();//初始化连接
50 connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11") ;
51 connection.connect();//开始连接
52 String line;
53 String web_data;
54 web_data=new String();
55 line=new String();
56 //输入流
57 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(),"UTF-8"));
58 while((line=in.readLine())!=null) {//java 中 null 为小写
59 web_data+=line;
60 }
61 return web_data;
62 }
63 }
64 class WFILE {
65 FileWriter fp;
66 PrintWriter outfp;
67 boolean OpenFile(String m,String f) {//将s内容输出至文件目录m下的f文件,wdata为文件内容
68 File catalogue = new File(m);//目录
69 try {
70 if (!catalogue.exists()) {
71 boolean ok=catalogue.mkdir();
72 if(!ok)//如果失败返回失败
73 return ok;
74 }
75 }
76 catch(Exception e) {//输出异常
77 System.out.println("在构建目录时异常,目录名为: "+m);
78 e.printStackTrace();
79 }
80 String lj=m+"\\"+f;
81 File fe=new File(lj);
82 try {
83 if(!fe.exists()) {
84 boolean ok2=fe.createNewFile();
85 if(!ok2)
86 return ok2;
87 }
88 fp = new FileWriter(fe);
89 outfp = new PrintWriter(fp);
90 return true;
91 }
92 catch(Exception e){
93 System.out.println("在构建文件时异常");
94 e.printStackTrace();
95 }
96 return false;
97 }
98 boolean CloseFile() {
99 if(fp==null)
100 return false;
101 else {
102 try {
103 fp.close();
104 } catch (IOException e) {
105 // TODO Auto-generated catch block
106 System.out.println("关闭文件错误");
107 e.printStackTrace();
108 }
109
110 }
111 return true;
112 }
113 <T>boolean print(T tdata){
114 if (fp==null||outfp==null)
115 return false;
116 outfp.print(tdata);
117 return true;
118 }
119 <T>boolean println(T tdata){
120 if (fp==null||outfp==null)
121 return false;
122 outfp.println(tdata);
123 return true;
124 }
125 }
126
127 class KMP {
128 int nextp[];
129 int n,m;
130 String pattern;
131 String text;
132 int cc;
133 int pp;
134 KMP(){
135 cc=0;
136 pp=0;
137 last=-1;
138 begin='\0';
139 }
140 ArrayList<Integer> ans;
141 boolean Set_substring(String pattern) {
142 this.pattern=pattern;
143 if(pattern==null)
144 return false;
145 int n=pattern.length();
146 this.n=n;
147 nextp=new int[n+10];
148 for(int i=0;i<n;i++)
149 nextp[i]=0;
150 for(int i=1;i<n;++i) {
151 int j=i;
152 while(j>0) {
153 j = nextp[j];
154 if(pattern.charAt(j)==pattern.charAt(i)) {
155 nextp[i+1]=j+1;
156 break;
157 }
158 }
159 }
160 return true;
161 }
162 void find_substring(String text) {
163 ans=new ArrayList<Integer>();
164 this.text=text;
165 int m=text.length();
166 this.m=m;
167 for(int i= 0,j=0;i<m;++i) {
168 if(j<n&&text.charAt(i)==pattern.charAt(j)) {
169 j++;
170 }
171 else {
172 while(j>0) {
173 try{
174 j=nextp[j];
175 }
176 catch(Exception e) {
177 }
178 if(text.charAt(i)==pattern.charAt(j)) {
179 ++j;
180 break;
181 }
182 }
183 }
184 if(j>=n) {
185 ans.add(i);
186 }
187 }
188 }
189 void Set_min(int t) {//Set the min index
190 cc=t;
191 }
192
193 char begin;
194 int last;
195 void set_last(int t) {
196 last=t;
197 }
198 void Set_begin(char t) {
199 begin = t;
200 }
201
202 ArrayList<String> get_data(char s) {
203 char a[];
204 ArrayList<String> zans = new ArrayList<String>();
205 a=new char[m];
206 for(int i=0;i<ans.size();i++) {
207 int w=ans.get(i);
208 if(i==0)
209 pp=w+1;
210 int ss=w;
211 if(last!=-1&&w>last)
212 break;
213 if(begin!='\0') {
214 while( ss < m && text.charAt(ss) != begin)
215 ss++;
216 if(ss==m)
217 continue;
218 else {
219 w=ss;
220 }
221 }
222 int t=0,ok=0;
223 if(w+1<=cc)
224 continue;
225 for(int j=w+1;j<m;j++) {
226 a[t++]=text.charAt(j);
227 if(a[t-1]==s) {
228 ok=1;
229 break;
230 }
231 }
232 if(ok==1) {
233 a[t-1]=0;
234 t-=1;
235 }
236 else
237 a[t]=0;
238 if(t==0)
239 continue;
240 char b[];
241 b=new char[t];
242 for(int i1=0;i1<t;i1++)
243 b[i1]=a[i1];
244 String p=new String(b);
245 zans.add(p);
246 }
247 cc=0;
248 begin = '\0';
249 last=-1;
250 return zans;
251 }
252 }