jar包
![]()
package Jsouop;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.print.DocFlavor;
import javax.print.attribute.standard.PDLOverrideSupported;
import java.io.*;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Jsoupdemo2 {
public static void main(String[] args) throws IOException {
//1.导入jar包
//2.获取DOCUMENT对象
//2.1获取路径
String paths = Jsoupdemo2.class.getClassLoader().getResource("student.xml").getPath();
//2.2解析XML对象。加载进内存
///Document document = Jsoup.parse(new File(paths), "utf-8");
//下一章的href
String Shref=null;
String urltow=readFileContent("d:\\yd.txt");
System.out.println(urltow);
//URL打开一个连接
//随便打开一个章节
URL url=new URL("http://www.uidzhx.com/du/27/27614/"+urltow+"");
//获取指定div元素
String sts=Jsoup.parse(url, 100000).select("#content1").toString();
String sc=getChinese(sts);
//转换为数组
char[] c=sc.toCharArray();
for(int i=0;i<=c.length-1;i++)
{
System.out.print(c[i]);
//每100个换一行
if(i%50==0){
System.out.println();
}
}
// System.out.print(ststow);//输出
//获取全部A标签元素
Elements a = Jsoup.parse(url, 100000).getElementsByTag("a");
for (Element element : a) {
if(element.text().contains("下一章")){
Shref=element.attr("href");//取得下一章的href
}
}
System.out.println(Shref);
//创建文本记录下一章节href
FileWriter fileWriter=new FileWriter("d:\\yd.txt");
fileWriter.write(Shref);
fileWriter.flush();
fileWriter.close();
}
public static String getChinese(String paramValue) {//正则方法
String regex = "([\u4e00-\u9fa5,。]+)";
String str = "";
Matcher matcher = Pattern.compile(regex).matcher(paramValue);
while (matcher.find()) {
str+= matcher.group(0);
if(str.length()%200==0){
System.out.println();
}
}
return str;
}
//读取TXT文本内容
public static String readFileContent(String fileName) {
File file = new File(fileName);
BufferedReader reader = null;
StringBuffer sbf = new StringBuffer();
try {
reader = new BufferedReader(new FileReader(file));
String tempStr;
while ((tempStr = reader.readLine()) != null) {
sbf.append(tempStr);
}
reader.close();
return sbf.toString();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
return sbf.toString();
}
}