预会热词统计

一、要求:

1、完成论文的题目、摘要、关键词、原文链接四项内容爬取;

2、存储到本地数据库中;

3、按照题目、关键词分类统计得到最热的十个领域方向;

4、热词越多,在热词云中显示的就越大,还要将热词与文章链接,点击热词云中的热词可以找到与之对应的文章题目;

二、效果

 

 

三、设计思路:

1、爬取、存取数据:

2、使用echart 的 wordCloud 实现热词云。

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />
<script src="js/jquery-1.11.3.min.js" type="text/javascript"></script>
<script type="text/javascript" src="js/echarts.min.js"></script>
<script type="text/javascript" src="js/china.js"></script>
<script src="js/bootstrap.min.js" type="text/javascript"></script>
<script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
<script src='js/echarts-wordcloud.js'></script>
</head>
<body>
<div id="main" style="width: 100%;height: 400px"></div>
<div>
  <table class="table" style="width: 100%;align-content: center;" >
    <tr>
      <th align="center">论文连接</th>
    </tr>
    <c:forEach var="item" items="${list}">
      <tr>
        <td><a href="${item.lianjie }">${item.title}</a></td>
      </tr>
    </c:forEach>
  </table>
</div>
<script>
  var chart = echarts.init(document.getElementById('main'));
  var dt;
  $.ajax({
    url : "PaperServlet_",
    async : false,
    type : "POST",
    success : function(data) {
      dt = data;
     // alert(dt[0].title);},
    error : function() {
      alert("请求失败");},
    dataType : "json"
  });
  var mydata = new Array(0);
  for (var i = 0; i < dt.length; i++) {
      var d = {};
      
      d["name"] = dt[i].name;
      //alert(dt[i].name);
      d["value"] = dt[i].value;
      mydata.push(d);}
  var option = {
    tooltip: {},
    series: [ {
      type: 'wordCloud',
      gridSize: 2,
      sizeRange: [20, 50],
      rotationRange: [-90, 90],
      shape: 'pentagon',
      width: 600,
      height: 300,
      drawOutOfBound: true,
      textStyle: {
        normal: {
          color: function () {
            return 'rgb(' + [
              Math.round(Math.random() * 160),
              Math.round(Math.random() * 160),
              Math.round(Math.random() * 160)
            ].join(',') + ')';}},
        emphasis: {
          shadowBlur: 10,
          shadowColor: '#333' } },
      data: mydata} ]};

  chart.setOption(option);
  chart.on('click', function (params) {
      var url = "ClickServlet?geunjian=" + params.name;
      window.location.href = url; });
  window.onresize = chart.resize;
</script>
</body>
</html>
View Code

 

3、将关键字分割成单词然后对单词进行去重、计数和排序,装到list,转换为json字符串传递给界面(ajax请求获取的数据)

package com.me.servlet;

import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.google.gson.Gson;
import com.me.dao.LWDao;
import com.me.domain.LunWen;
import com.me.domain.Tu;

@WebServlet("/PaperServlet_")
public class PaperServlet_ extends HttpServlet {
    private static final long serialVersionUID = 1L;
    public PaperServlet_() {
        super();}

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        response.setHeader("content-type", "text/html;charset=UTF-8");
        response.setCharacterEncoding("UTF-8");
        LWDao dao = new LWDao();
        List<LunWen> list = new ArrayList<LunWen>();
        List<Tu> list_tu = new ArrayList<Tu>();
        String [] str = new String[10000];
        String [] str_ = new String[10000];
        int [] b = new int[10000];
        int num = 0;
        int length1 = 0;
        try {
            list = dao.search_();} 
        catch (SQLException e) {
            e.printStackTrace();}
        for(int i=0;i<list.size();i++) {
            if(list.get(i).getLianjie()!=null) {
                String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());
                list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);}
            String[] split = list.get(i).getGuanjian().split(" ");
            for(int j=0;j<split.length;j++) {
                str[num++] = split[j];}}
        for(int k=0;k<num;k++) {
            b[k]=0;}
        str_[0]=str[0];
        int tt=1;
        Boolean rt=true;
        for(int i=1;i<num;i++) {
            rt=false;
            for(int j=0;j<tt;j++) {
                if(str[i].equals(str_[j])) {
                    rt=true;
                    break;}}
            if(!rt) {
                str_[tt]=str[i];
                tt++;}}
        length1=tt;
        for(int i=0;i<length1;i++) {
            for(int j=0;j<num;j++) {
                if(str_[i].equals(str[j])) {
                    b[i]++;}}}
        int t3=0;
        int t2=0;
        String sr="";
        for(int i=0;i<length1-1;i++) {
            t3=i;
            for(int j=i+1;j<length1;j++) {
                if(b[t3]<b[j]) {
                    t3=j;}}
           if(t3!=i) {
               t2=b[i];
               b[i]=b[t3];
               b[t3]=t2;
               sr=str_[i];
               str_[i]=str_[t3];
               str_[t3]=sr;}}
        for(int i=0;i<100;i++) {
            Tu tu = new Tu();
            tu.name=str_[i];
            tu.value= b[i];
            list_tu.add(tu);}
        
        Gson gson = new Gson();
        String json = gson.toJson(list_tu);
        response.getWriter().write(json);}
    

    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);}}
View Code

 

1)热词实体

2)将关键字分割成单词然后对单词进行去重、计数和排序,装到list

4、论文连接列表数据准备(PaperServlet是最初访问的地方,携带数据跳转到jsp界面)

1)论文实体

5、点击热词后携带此热词到servlet,再从数据库中找出论文的关键字中包含此热词的论文列表

6、dao层

posted @ 2020-04-17 14:41  S&JH  阅读(129)  评论(0)    收藏  举报