可能需要的pom依赖包:
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
<dependency>
    <groupId>commons-codec</groupId>
    <artifactId>commons-codec</artifactId>
    <version>1.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
    <version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
<dependency>
    <groupId>commons-logging</groupId>
    <artifactId>commons-logging</artifactId>
    <version>1.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-httpclient/commons-httpclient -->
<dependency>
    <groupId>commons-httpclient</groupId>
    <artifactId>commons-httpclient</artifactId>
    <version>3.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.8.3</version>
</dependency>
主要贴出实现类相关代码:
@Override
    public boolean inserturlNews(String urls) {
        // TODO: 2021/5/17 只支持新民网数据爬取,可根据页面标签定时解析
        String url = urls;
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
            Elements listDiv = doc.getElementsByAttributeValue("class", "type_content_list type-item");
            NewsInformation newsInformation= new NewsInformation();
            for (Element element : listDiv) {
                Elements texts = element.getElementsByTag("a");
                for (Element text : texts) {
                    String newsUrl=text.attr("href");
                    String ptext = text.attr("title");
                    if (! ptext.isEmpty() && newsUrl.contains(".html")){
                        newsInformation.setTitle(ptext);
                        newsInformation.setNewsUrl(newsUrl);
                        try {
                            Document newsDoc = Jsoup.connect(newsUrl).get();
                            newsInformation.setForm(newsDoc.select(".info").select("span").get(0).text());
                            //环球,时政
                            if (url.contains("http://newsxmwb.xinmin.cn/world/") || url.contains("http://newsxmwb.xinmin.cn/shizheng/")) {
                                newsInformation.setAuthor(newsDoc.select(".info").select("span").get(1).text());
                                newsInformation.setDataTime(newsDoc.select(".info").select("span").get(3).text());
                                if (!newsInformation.getDataTime().contains("2021-")){
                                    newsInformation.setDataTime(now.format(fmTime));
                                }
                                //文、体会
                            }else if (url.contains("http://newsxmwb.xinmin.cn/wentihui/")){
                                newsInformation.setAuthor(newsDoc.select(".info").select("span").get(1).text());
                                newsInformation.setDataTime(newsDoc.select(".info").select("span").get(4).text());
                                if (!newsInformation.getDataTime().contains("2021-")){
                                    newsInformation.setDataTime(newsDoc.select(".info").select("span").get(3).text());
                                }
                                //头条
                            } else if (url.contains("http://shanghai.xinmin.cn/t/gdbd/")){
                                newsInformation.setAuthor(newsDoc.select(".info").select("span").get(1).text());
                                newsInformation.setDataTime(newsDoc.select(".info").select("span").get(3).text());
                                if (!newsInformation.getDataTime().contains("2021-")){
                                    newsInformation.setDataTime(now.format(fmTime));
                                }
                            }
                            Elements listNewsDetail = newsDoc.getElementsByAttributeValue("class", "a_content");
                            for (Element listNews : listNewsDetail) {
                                Elements contents = listNews.getElementsByTag("p");
                                Elements images = listNews.getElementsByTag("img");
                                newsInformation.setImage(images.attr("src"));
                                StringBuffer buffer =new StringBuffer();
                                for (Element newsContent : contents) {
                                    buffer.append