JAVA网络爬虫
HttpClient

导航

 

Spring Data Elasticsearch-代码版

项目结构

在这里插入图片描述

配置文件

  1. pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.leyou.demo</groupId>
        <artifactId>es-demo</artifactId>
        <version>1.0-SNAPSHOT</version>
        <name>elasticsearch</name>
        <description>Demo project for Spring Boot</description>
    
        <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>2.0.2.RELEASE</version>
            <relativePath/> <!-- lookup parent from repository -->
        </parent>
    
        <properties>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
            <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
            <java.version>1.8</java.version>
        </properties>
    
        <dependencies>
            <dependency>
                <!-- 引入elasticSearch启动器 -->
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-test</artifactId>
                <scope>test</scope>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
            </dependency>
        </dependencies>
    
        <build>
            <plugins>
                <!-- maven插件 -->
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                </plugin>
            </plugins>
        </build>
    
    </project>
    
  2. application.yml

    spring:
      data:
        elasticsearch:
          cluster-name: elasticsearch  # 集群名称
          cluster-nodes: 192.168.79.128:9300  # 集群地址
    

启动类

  1. EsApplication

    package com.leyou;
    
    import org.springframework.boot.SpringApplication;
    import org.springframework.boot.autoconfigure.SpringBootApplication;
    
    /**
     * @program: es-demo
     * @description:
     * @author: Mr.Xiao
     * @create: 2020-06-10 09:59
     **/
    @SpringBootApplication
    public class EsApplication {
    
        public static void main(String[] args) {
            SpringApplication.run(EsApplication.class);
        }
    
    }
    
    

实体类

  1. Item

    package com.leyou.es.pojo;
    
    import lombok.AllArgsConstructor;
    import lombok.Data;
    import lombok.NoArgsConstructor;
    import org.springframework.data.annotation.Id;
    import org.springframework.data.elasticsearch.annotations.Document;
    import org.springframework.data.elasticsearch.annotations.Field;
    import org.springframework.data.elasticsearch.annotations.FieldType;
    
    /**
     * @program: es-demo
     * @description:
     * @author: Mr.Xiao
     * @create: 2020-06-10 10:00
     **/
    @AllArgsConstructor
    @NoArgsConstructor
    @Data
    /*
        indexName: 索引库名称
        type: 类型
        shards: 分页数量
        replicas: 副本数量
     */
    @Document(indexName = "xiaoge2", type = "item", shards = 1, replicas = 1)
    public class Item {
    
        @Field(type = FieldType.Long) // 声明字段的类型
        @Id // 这个字段作为索引库的id来用
        Long id;
    
        /*
            指定为text因为标题将来要用来分词
            analyzer: 指定分词器
         */
        @Field(type = FieldType.Text, analyzer = "ik_smart")
        String title; //标题
    
        /*
            因为商品分类是不需要分词的, 所以用keyword
            index: 将来会用来当做搜索, 过滤, 默认为true这里可以不写
         */
        @Field(type = FieldType.Keyword, index = true)
        String category;// 分类
    
        @Field(type = FieldType.Keyword, index = true)
        String brand; // 品牌
    
        @Field(type = FieldType.Double, index = true)
        Double price; // 价格
    
        /*
            因为商品分类是不需要分词的, 所以用keyword
            index: false 将来不能被搜索或者过滤
         */
        @Field(type = FieldType.Keyword, index = false)
        String images; // 图片地址
    
    }
    
    

接口

  1. ItemRepository

    package com.leyou.es.repository;
    
    import com.leyou.es.pojo.Item;
    import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
    
    import java.util.List;
    
    /**
     * @program: es-demo
     * @description: ElasticsearchRepository 第一个参数实体类类型, 第二个参数id类型
     * @author: Mr.Xiao
     * @create: 2020-06-10 15:35
     **/
    // ElasticsearchRepository 跟 通用mapper一样, 里面包含了各种增删改查
    public interface ItemRepository extends ElasticsearchRepository<Item, Long> {
    
        /*
            自己写一个范围查询的方法, 它会自动帮你实现, 比通用mapper更牛逼
            注意:
                当你写方法名时: 它会有提示, 要有根据他提供的单词意思来给方法取名, 不是瞎写的
         */
        List<Item> findByPriceBetween(Double begin, Double end);
    
    }
    
    

测试

  1. EsTest

    package com.leyou.es.demo;
    
    import com.leyou.es.pojo.Item;
    import com.leyou.es.repository.ItemRepository;
    import org.elasticsearch.index.query.MatchQueryBuilder;
    import org.elasticsearch.index.query.QueryBuilder;
    import org.elasticsearch.index.query.QueryBuilders;
    import org.elasticsearch.search.aggregations.Aggregation;
    import org.elasticsearch.search.aggregations.AggregationBuilders;
    import org.elasticsearch.search.aggregations.Aggregations;
    import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
    import org.elasticsearch.search.sort.SortBuilders;
    import org.elasticsearch.search.sort.SortOrder;
    import org.junit.Test;
    import org.junit.runner.RunWith;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.boot.test.context.SpringBootTest;
    import org.springframework.data.domain.Page;
    import org.springframework.data.domain.PageRequest;
    import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
    import org.springframework.data.elasticsearch.core.aggregation.AggregatedPage;
    import org.springframework.data.elasticsearch.core.query.FetchSourceFilter;
    import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
    import org.springframework.test.context.TestPropertySource;
    import org.springframework.test.context.junit4.SpringRunner;
    
    import java.io.PrintStream;
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * @program: es-demo
     * @description:
     * @author: Mr.Xiao
     * @create: 2020-06-10 15:08
     **/
    @RunWith(SpringRunner.class)
    @SpringBootTest
    public class EsTest {
    
        @Autowired
        ElasticsearchTemplate template;  // 注册ElasticsearchTemplate对象
    
        @Autowired
        private ItemRepository repository; // 注册ItemRepository
    
        @Test
        public void testCreate() {
            // 创建索引库
            template.createIndex(Item.class);
            // 创建映射关系
            template.putMapping(Item.class);
    
            /*
                删除索引
                template.deleteIndex(Item.class);
             */
    
        }
    
        /**
         * find开头的方法是查询
         * save开头的是新增, 如果id相同就是修改
         * delete开头的是删除
         */
        @Test
        public void indexList() {
            List<Item> list = new ArrayList<>();
            list.add(new Item(1L, "小米手机7", "手机", "小米", 3299.00, "http://image.leyou.com/13123.jpg"));
            list.add(new Item(2L, "坚果手机R1", "手机", "锤子", 3699.00, "http://image.leyou.com/13123.jpg"));
            list.add(new Item(3L, "华为META10", "手机", "华为", 4499.00, "http://image.leyou.com/13123.jpg"));
            list.add(new Item(4L, "小米Mix2S", "手机", "小米", 4299.00, "http://image.leyou.com/13123.jpg"));
            list.add(new Item(5L, "荣耀V10", "手机", "华为", 2799.00, "http://image.leyou.com/13123.jpg"));
            // 接收对象集合,实现批量新增
            repository.saveAll(list);
        }
    
        @Test
        public void testFind() {
            // 查询所有
            Iterable<Item> items = repository.findAll();
            for (Item item : items) {
                System.out.println(item);
            }
        }
    
        @Test
        public void testFindPriceFilterBetween() {
            // 查询价钱在2000-4000的商品
            Iterable<Item> items = repository.findByPriceBetween(2000d, 4000d);
            for (Item item : items) {
                System.out.println(item);
            }
        }
    
        /**
         * 基本查询: 分词查询
         */
        @Test
        public void testQuery(){
            // QueryBuilders原生包下的, 构建条件
            MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("title", "小米");
            // 执行查询, 分词查询title中代用小米连个字的, 对象
            Iterable<Item> items = repository.search(queryBuilder);
            items.forEach(System.out::println);
        }
    
        /**
         * 自定义查询
         */
        @Test
        public void testNativeQuery(){
            // 创建查询构建器 spring提供的
            NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
            // 结果过滤 FetchSourceFilter的两个参数 includes excludes 效果就是elasticSearch中结果过滤的includes excludes
            queryBuilder.withSourceFilter(new FetchSourceFilter(new String[]{"id", "title", "price"}, null));
            // 添加基本的分词查询
            queryBuilder.withQuery(QueryBuilders.matchQuery("title", "小米"));
            // 执行搜索,获取结果
            Page<Item> items = repository.search(queryBuilder.build());
            // 打印总条数
            System.out.println(items.getTotalElements());
            // 打印总页数
            System.out.println(items.getTotalPages());
    
            items.forEach(System.out::println);
        }
    
        /**
         * 分页查询
         */
        @Test
        public void testNativeQueryPage(){
            // 创建查询构建器 spring提供的
            NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
            // 添加基本的分词查询
            queryBuilder.withQuery(QueryBuilders.termQuery("category", "手机"));
    
            // 初始化分页参数  注意: 这里的页码是从0开始, 不像我们之前是从1开始
            int page = 0;
            int size = 3;
            // 设置分页参数
            queryBuilder.withPageable(PageRequest.of(page, size));
    
            // 执行搜索,获取结果
            Page<Item> items = repository.search(queryBuilder.build());
            // 打印总条数
            System.out.println(items.getTotalElements());
            // 打印总页数
            System.out.println(items.getTotalPages());
            // 每页大小
            System.out.println(items.getSize());
            // 当前页
            System.out.println(items.getNumber());
    
            // 打印当前页的内容
            items.forEach(System.out::println);
        }
    
        /**
         * 排序
         */
        @Test
        public void testSort(){
            // 创建查询构建器 spring提供的
            NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
            // 添加 不分词查询(category等于手机)
            queryBuilder.withQuery(QueryBuilders.termQuery("category", "手机"));
    
            // 排序 根据price字段, 降序排序
            queryBuilder.withSort(SortBuilders.fieldSort("price").order(SortOrder.DESC));
    
            // 执行搜索,获取结果
            Page<Item> items = repository.search(queryBuilder.build());
            // 打印总条数
            System.out.println(items.getTotalElements());
    
            items.forEach(System.out::println);
        }
    
        /**
         * 整合上面所有功能
         */
        @Test
        public void integration() {
            // 创建查询构建器 spring提供的
            NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
    
            // 结果过滤
            queryBuilder.withSourceFilter(new FetchSourceFilter(new String[] {"id", "title", "price"}, null));
    
            // 添加查询条件
            queryBuilder.withQuery(QueryBuilders.matchQuery("title", "小米"));
    
            // 排序
            queryBuilder.withSort(SortBuilders.fieldSort("price").order(SortOrder.DESC));
    
            // 分页
            queryBuilder.withPageable(PageRequest.of(0, 2));
    
            // 执行查询
            Page<Item> list = repository.search(queryBuilder.build());
    
            // 获取总元素个数
            System.out.println(list.getTotalElements());
    
            // 获取总页数
            System.out.println(list.getTotalPages());
    
            // 获取当前页
            System.out.println(list.getNumber());
    
            // 获取每页大小
            System.out.println(list.getSize());
    
            // 获取当前页内容
            System.out.println(list.getContent());
    
            // 获取当前页内容
            list.forEach(System.out::println);
        }
    
        /**
         * 聚合查询
         */
        @Test
        public void testAgg() {
            // 创建查询构建器 spring提供的
            NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
    
            // 聚合名称
            String aggName = "popularBrand";
    
            // 聚合
            queryBuilder.addAggregation(AggregationBuilders.terms(aggName).field("brand"));
    
            // 查询并返回带聚合的结果
            AggregatedPage<Item> items = template.queryForPage(queryBuilder.build(), Item.class);
    
            // 解析聚合
            Aggregations aggregations = items.getAggregations();
    
            // 获取指定名称的聚合(这里用的实现类, 因为用terms做的聚合, 被聚合的brand是String 所以这里用的StringTerms)
            StringTerms terms = aggregations.get(aggName);
    
            // 获取桶
            List<StringTerms.Bucket> buckets = terms.getBuckets();
    
            for (StringTerms.Bucket bucket : buckets) {
                System.out.println("key = " + bucket.getKey());
                System.out.println("docCount = " + bucket.getDocCount());
            }
    
        }
    
    }
    
    
posted on 2020-06-11 14:18  gmlgxx  阅读(71)  评论(0)    收藏  举报