byte[] bytes = Files.readAllBytes(Paths.get("E:\\pdf\\aaa\\html\\text.txt").normalize());
        String text = IOUtils.toString(bytes);

        String xml = text.substring(text.indexOf("<tbody>"));
        InputSource inputXML = new InputSource( new StringReader( xml ) );

        XPath xPath = XPathFactory.newInstance().newXPath();
        NodeList nodes = (NodeList) xPath.evaluate("/tbody/tr", inputXML, XPathConstants.NODESET);
        int length = nodes.getLength();
        Path file = Paths.get("E:\\pdf\\aaa\\html\\out.txt");
        try (BufferedWriter writer = Files.newBufferedWriter(file, Charset.defaultCharset(), StandardOpenOption.CREATE)) {
            for (int i = 0; i < length; i++) {
                Node node = nodes.item(i);

                NodeList childList = (NodeList) xPath.evaluate("td", node, XPathConstants.NODESET);
                for (int j = 0; j < childList.getLength(); j++) {
                    Node child = childList.item(j);
                    String content = child.getTextContent();
                    //System.out.print(content);
                    writer.write(content);
                    if (j <childList.getLength() - 1) {
                        writer.write("\t");
                    }
                }
                writer.newLine();
            }


        }

text.txt内容

 

 输出内容:

 

posted on 2019-09-04 23:51  你不知道的浪漫  阅读(1057)  评论(0编辑  收藏  举报