03_Jsoup

【1.获取一个页面所有的链接】

public static void main(String[] args) throws IOException {
    String url="http://www.cnblogs.com/HigginCui/";
    Document doc=Jsoup.connect(url).get();  //下载并解析成html DOM结构
    System.out.println(doc);   //这里打印就是整个html页面
    Elements links=doc.select("a[href]"); //使用select方法选择元素
    System.out.println(links.size());
    for(Element link:links){
        System.err.println("<a href=\""+link.attr("abs:href")+"\"> ("+link.text()+") </a>");
    }
}

【运行结果】

 

【02】

@Test  
public void test01(){
    String html="<p> "
              + "    <a href='http://example.com/'>"
              + "        <b>霸气</b>"
              + "    </a> "
              + "    link."
              + "</p>";
    Document doc=Jsoup.parse(html);  
    Element ele=doc.select("a").first();  //查找第一个a元素
    
    System.out.println("ele.text()==="+ele.text());  //Element.text()获取标签的文本值
    System.out.println("ele.attr(\"href\")==="+ele.attr("href"));
    String linkOuter=ele.outerHtml();
    System.out.println("ele.outerHtml()==="+linkOuter);
}

【运行结果】

 

【03.根据id获取对应的Element】

@Test
public void test02(){
    String html="<p id=\"ppp\" value=\"i am best!\">哈哈哈  </p>";
    Document doc=Jsoup.parse(html);  
    Element ele=doc.select("#ppp").first();  //查找第一个a元素
    System.out.println(ele.attr("value"));
    System.out.println(ele.text());
    
}

【运行结果】

 

【04】

@Test
public void test03(){
    String html="<div id=\"zxSale\">"
              + "    <table class=\"sssss\">"
              + "        <tbody>"
              + "            <tr value=\"tttttrrrrr\">"
              + "                <td>2017-02-22</td>"
              + "                <td> 富国基金</td>"
              + "                <td>嘉实基金</td>"
              + "            </tr>"
              + "            <tr value=\"tttttrrrrr\">"
              + "                <td>2017-03-22</td>"
              + "                <td>建信基金</td>"
              + "                <td>易方达基金</td>"
              + "            </tr>"
              + "        </tbody>"
              + "    </table>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements trs=doc.select("#zxSale > table > tbody > tr");
    for(Element tr:trs){
        Elements tds=tr.getElementsByTag("td");
        System.out.println(tds.get(0).ownText());
        System.out.println(tds.get(1).ownText());
        System.out.println(tds.get(2).ownText());
//            System.out.println("这个是不存在的:"+tds.get(3).ownText()+"!"); //这里会报错
    }
}

【运行结果】

【05】

@Test
public void test03_plus(){
    String html="<div id=\"zxSale\">"
              + "    <table class=\"sssss\">"
              + "        <tbody>"
              + "            <tr value=\"tttttrrrrr\">"
              + "                <td>2017-02-22</td>"
              + "                <td> 富国基金</td>"
              + "                <td>嘉实基金</td>"
              + "            </tr>"
              + "            <tr value=\"tttttrrrrr\">"
              + "                <td>2017-03-22</td>"
              + "                <td>建信基金</td>"
              + "                <td>易方达基金</td>"
              + "            </tr>"
              + "        </tbody>"
              + "    </table>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements trs=doc.select("#zxSale > table > tbody > tr");
    for(Element tr:trs){
        Elements tds=tr.getElementsByTag("td");   //根据标签获取元素
        for(Element td : tds){
            System.out.println(td.text());
        }
    }
}

【运行结果】

 

 【06】

@Test
public void test04(){
    String html="<div id=\"zxSale\">"
              + "    <div>"
              + "        <select name=\"fundCode\">"
              + "            <option value=\"\">全部</option>"
              + "            <option value=\"000001\">华夏001</option>"
              + "            <option value=\"000002\">华夏002</option>"
              + "            <option value=\"000003\">华夏003</option>"
              + "            <option value=\"000004\">华夏004</option>"
              + "            <option value=\"000005\">华夏005</option>"
              + "            <option value=\"000006\">华夏006</option>"
              + "        </select>"
              + "    </div>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements options=doc.select("select[name=fundCode]").get(0).getElementsByTag("option");
    if(options.size()>1){
        for(Element option:options){
            System.out.println("value==="+option.attr("value"));
            System.out.println("owntext==="+option.ownText());
        }
    }
}

【运行结果】

posted @ 2017-03-01 14:11  HigginCui  阅读(572)  评论(0编辑  收藏  举报