数据分析中 使用ip2region来进行 ip地址转换为城市

 官网地址:https://github.com/lionsoul2014/ip2region/blob/master/ReadMe.md

介绍:具体看github上的readme文档

image

使用说明:

在github上下载ip2region_v4.xdb,并放在项目的src/main/resources下。下载地址:ip2region/data/ip2region_v4.xdb at master · lionsoul2014/ip2region

引入依赖:

 <dependency>
       <groupId>org.lionsoul</groupId>
       <artifactId>ip2region</artifactId>
       <version>2.6.5</version>
 </dependency>

测试数据:text.txt

106.39.41.166,10/Nov/2016:00:01:02 +0800,10,54 ,video,8701
113.140.11.123,10/Nov/2016:00:01:02 +0800,10,54 ,video,5915
125.122.216.102,10/Nov/2016:00:01:02 +0800,10,54 ,video,9819
116.231.219.242,10/Nov/2016:00:01:02 +0800,10,54 ,video,6335
61.136.143.62,10/Nov/2016:00:01:02 +0800,10,54 ,video,7629
39.186.247.142,10/Nov/2016:00:01:02 +0800,10,2957 ,video,3237

具体代码:

IpRegionProcessor
import org.lionsoul.ip2region.xdb.Searcher;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class IpRegionProcessor {
    public static void main(String[] args) {
        // 数据库文件路径
        String dbPath = "src/main/resources/ip2region_v4.xdb";
        // 输入文件路径
        String inputFilePath = "d:\\IdeaProjects\\da12.15\\test.txt";
        // 输出文件路径
        String outputFilePath = "d:\\IdeaProjects\\da12.15\\output.txt";

        Searcher searcher = null;
        try {
            // 1. 加载整个xdb文件到内存
            byte[] cBuff = Searcher.loadContentFromFile(dbPath);
            // 2. 创建基于内存的查询对象
            searcher = Searcher.newWithBuffer(cBuff);

            // 3. 读取输入文件内容
            List<String> lines = readFile(inputFilePath);
            // 4. 处理每一行
            List<String> processedLines = processLines(lines, searcher);
            // 5. 写入输出文件
            writeFile(outputFilePath, processedLines);

            System.out.println("文件处理完成,结果已保存到: " + outputFilePath);
        } catch (Exception e) {
            System.err.println("处理文件时发生错误: " + e.getMessage());
            e.printStackTrace();
        } finally {
            // 6. 关闭资源
            if (searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    System.err.println("关闭Searcher时发生错误: " + e.getMessage());
                    e.printStackTrace();
                }
            }
        }
    }

    /**
     * 读取文件内容
     */
    private static List<String> readFile(String filePath) throws IOException {
        List<String> lines = new ArrayList<>();
        try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
            String line;
            while ((line = reader.readLine()) != null) {
                lines.add(line);
            }
        }
        return lines;
    }

    /**
     * 写入文件内容
     */
    private static void writeFile(String filePath, List<String> lines) throws IOException {
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
            for (String line : lines) {
                writer.write(line);
                writer.newLine();
            }
        }
    }

    /**
     * 处理每一行,将IP地址转换为城市(ip)格式
     */
    private static List<String> processLines(List<String> lines, Searcher searcher) throws Exception {
        List<String> processedLines = new ArrayList<>();
        for (String line : lines) {
            if (line.isEmpty()) {
                processedLines.add(line);
                continue;
            }

            // 提取第一个逗号前的IP地址
            int commaIndex = line.indexOf(',');
            if (commaIndex == -1) {
                processedLines.add(line);
                continue;
            }

            String ip = line.substring(0, commaIndex).trim();
            String restOfLine = line.substring(commaIndex);

            // 查询IP对应的区域信息
            String region = searcher.search(ip);
            String city = extractCity(region);

            // 构建新的行:城市(ip) + 剩余内容
            String newLine = city + "(" + ip + ")" + restOfLine;
            processedLines.add(newLine);
        }
        return processedLines;
    }

    /**
     * 从region信息中提取城市名
     * region格式:国家|省份|城市|ISP
     */
    private static String extractCity(String region) {
        if (region == null || region.isEmpty()) {
            return "未知城市";
        }

        String[] parts = region.split("\\|");
        if (parts.length >= 3) {
            // 第三部分是城市
            return parts[2];
        } else if (parts.length == 2) {
            // 如果没有城市信息,返回省份
            return parts[1];
        } else {
            // 否则返回国家
            return parts[0];
        }
    }
}

运行结果:output.txt

北京市(106.39.41.166),10/Nov/2016:00:01:02 +0800,10,54 ,video,8701
西安市(113.140.11.123),10/Nov/2016:00:01:02 +0800,10,54 ,video,5915
杭州市(125.122.216.102),10/Nov/2016:00:01:02 +0800,10,54 ,video,9819
上海市(116.231.219.242),10/Nov/2016:00:01:02 +0800,10,54 ,video,6335
宜昌市(61.136.143.62),10/Nov/2016:00:01:02 +0800,10,54 ,video,7629
杭州市(39.186.247.142),10/Nov/2016:00:01:02 +0800,10,2957 ,video,3237

 

posted @ 2025-12-15 20:34  雨花阁  阅读(8)  评论(0)    收藏  举报