jsoup解析本地html网页到本地——Document、Element、select应用

  • 对于以下网页:

  <!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Pizza Menu</title>
    <style>
      body {
        font-family: Arial, sans-serif;
        background-color: #f8f8f8;
        color: #333;
        margin: 0;
        padding: 20px;
      }
      h1 {
        text-align: center;
        color: #e74c3c;
      }
      .pizza-menu {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-around;
      }
      .pizza {
        background-color: #fff;
        border: 2px solid #ccc;
        border-radius: 10px;
        width: 300px;
        margin: 20px;
        padding: 15px;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
      }
      .pizza h2 {
        color: #e74c3c;
      }
      .pizza p {
        font-size: 14px;
      }
      .price {
        font-weight: bold;
        font-size: 18px;
        color: #27ae60;
      }
      .features {
        font-style: italic;
        color: #7f8c8d;
      }

      .french-fries {
        background-color: #f9f7f0;
        border: 2px solid #f1c40f;
        border-radius: 10px;
        width: 300px;
        margin: 20px;
        padding: 15px;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
      }
      .french-fries h2 {
        color: #d35400;
      }
      .french-fries p {
        font-size: 14px;
      }
      .french-fries .price {
        font-weight: bold;
        font-size: 18px;
        color: #e67e22;
      }
      .french-fries .features {
        font-style: italic;
        color: #7f8c8d;
      }

      .fried-chicken {
        background-color: #f8f8f8;
        border: 2px solid #ddd;
        border-radius: 10px;
        width: 300px;
        margin: 20px;
        padding: 15px;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
      }
      .fried-chicken h2 {
        color: #114514;
      }
      .fried-chicken p {
        font-size: 14px;
      }

      .drink {
        background-color: #f8f8f8;
        border: 2px solid #ddd;
        border-radius: 10px;
        width: 300px;
        margin: 20px;
        padding: 15px;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
      }
      .drink h2 {
        color: #233333;
      }
      .drink p {
        font-size: 14px;
      }
    </style>
  </head>
  <body>
    <h1>Garfield Restaurant Menu</h1>
    <h2>Pizzas</h2>
    <br />

    <div class="pizza-menu">
      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Margherita%20Pizza.jpeg"
          alt="Margherita Pizza"
          style="width: 100%"
        />
        <h2>Margherita Pizza</h2>
        <p>Weight: 500g</p>
        <p>Calories: 1200 kcal</p>
        <p>Radius: 9 inches</p>
        <p class="price">$8.99</p>
        <p class="features">
          Features: Classic Italian pizza with tomato sauce, mozzarella, and
          fresh basil.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Pepperoni%20Pizza.jpeg"
          alt="Pepperoni Pizza"
          style="width: 100%"
        />
        <h2>Pepperoni Pizza</h2>
        <p>Weight: 600g</p>
        <p>Calories: 1400 kcal</p>
        <p>Radius: 12 inches</p>
        <p class="price">$10.50</p>
        <p class="features">
          Features: Loaded with pepperoni slices, mozzarella cheese, and tomato
          sauce.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Veggie%20Delight%20Pizza.jpeg"
          alt="Veggie Delight Pizza"
          style="width: 100%"
        />
        <h2>Veggie Delight Pizza</h2>
        <p>Weight: 550g</p>
        <p>Calories: 1100 kcal</p>
        <p>Radius: 6 inches</p>
        <p class="price">$9.50</p>
        <p class="features">
          Features: Topped with fresh vegetables like bell peppers, onions,
          olives, and tomatoes.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/BBQ%20Chicken%20Pizza.jpeg"
          alt="BBQ Chicken Pizza"
          style="width: 100%"
        />
        <h2>BBQ Chicken Pizza</h2>
        <p>Weight: 650g</p>
        <p>Calories: 1500 kcal</p>
        <p>Radius: 9 inches</p>
        <p class="price">$11.75</p>
        <p class="features">
          Features: Smoked BBQ chicken with tangy BBQ sauce, red onions, and
          mozzarella.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Hawaiian%20Pizza.jpeg"
          alt="Hawaiian Pizza"
          style="width: 100%"
        />
        <h2>Hawaiian Pizza</h2>
        <p>Weight: 620g</p>
        <p>Calories: 1350 kcal</p>
        <p>Radius: 12 inches</p>
        <p class="price">$10.25</p>
        <p class="features">
          Features: A sweet and savory combo of ham, pineapple, and mozzarella
          cheese.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Four%20Cheese%20Pizza.jpeg"
          alt="Four Cheese Pizza"
          style="width: 100%"
        />
        <h2>Four Cheese Pizza</h2>
        <p>Weight: 580g</p>
        <p>Calories: 1300 kcal</p>
        <p>Radius: 6 inches</p>
        <p class="price">$9.99</p>
        <p class="features">
          Features: A rich blend of mozzarella, parmesan, cheddar, and
          gorgonzola cheeses, with a touch of garlic and olive oil.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Spicy%20Sausage%20Pizza.jpeg"
          alt="Spicy Sausage Pizza"
          style="width: 100%"
        />
        <h2>Spicy Sausage Pizza</h2>
        <p>Weight: 630g</p>
        <p>Calories: 1450 kcal</p>
        <p>Radius: 9 inches</p>
        <p class="price">$11.00</p>
        <p class="features">
          Features: Topped with spicy Italian sausage, red chili flakes, onions,
          and mozzarella cheese.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Seafood%20Pizza.jpeg"
          alt="Seafood Pizza"
          style="width: 100%"
        />
        <h2>Seafood Pizza</h2>
        <p>Weight: 670g</p>
        <p>Calories: 1600 kcal</p>
        <p>Radius: 12 inches</p>
        <p class="price">$12.50</p>
        <p class="features">
          Features: A delightful mix of shrimp, calamari, and mussels, with
          garlic-pepper sauce and mozzarella.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Buffalo%20Chicken%20Pizza.jpeg"
          alt="Buffalo Chicken Pizza"
          style="width: 100%"
        />
        <h2>Buffalo Chicken Pizza</h2>
        <p>Weight: 640g</p>
        <p>Calories: 1550 kcal</p>
        <p>Radius: 6 inches</p>
        <p class="price">$11.25</p>
        <p class="features">
          Features: Spicy buffalo chicken, blue cheese crumbles, red onions, and
          a drizzle of ranch dressing.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Truffle%20Mushroom%20Pizza.jpeg"
          alt="Truffle Mushroom Pizza"
          style="width: 100%"
        />
        <h2>Truffle Mushroom Pizza</h2>
        <p>Weight: 600g</p>
        <p>Calories: 1400 kcal</p>
        <p>Radius: 9 inches</p>
        <p class="price">$12.75</p>
        <p class="features">
          Features: Creamy truffle sauce, wild mushrooms, mozzarella, and a
          sprinkle of fresh arugula.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Meat%20Lover's%20Pizza.jpeg"
          alt="Meat Lover's Pizza"
          style="width: 100%"
        />
        <h2>Meat Lover's Pizza</h2>
        <p>Weight: 680g</p>
        <p>Calories: 1700 kcal</p>
        <p>Radius: 12 inches</p>
        <p class="price">$12.99</p>
        <p class="features">
          Features: Packed with pepperoni, sausage, ham, bacon, and mozzarella
          cheese.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Greek%20Pizza.jpeg"
          alt="Greek Pizza"
          style="width: 100%"
        />
        <h2>Greek Pizza</h2>
        <p>Weight: 570g</p>
        <p>Calories: 1250 kcal</p>
        <p>Radius: 6 inches</p>
        <p class="price">$10.75</p>
        <p class="features">
          Features: Feta cheese, black olives, red onions, tomatoes, and spinach
          on a garlic-infused crust.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Pesto%20Chicken%20Pizza.jpeg"
          alt="Pesto Chicken Pizza"
          style="width: 100%"
        />
        <h2>Pesto Chicken Pizza</h2>
        <p>Weight: 590g</p>
        <p>Calories: 1350 kcal</p>
        <p>Radius: 9 inches</p>
        <p class="price">$11.50</p>
        <p class="features">
          Features: Grilled chicken, basil pesto sauce, mozzarella, and
          sun-dried tomatoes.
        </p>
      </div>

      <div class="pizza">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Smoked%20Salmon%20Pizza.jpeg"
          alt="Smoked Salmon Pizza"
          style="width: 100%"
        />
        <h2>Smoked Salmon Pizza</h2>
        <p>Weight: 580g</p>
        <p>Calories: 1400 kcal</p>
        <p>Radius: 12 inches</p>
        <p class="price">$13.75</p>
        <p class="features">
          Features: Smoked salmon, cream cheese, red onions, capers, and fresh
          dill on a thin crispy crust.
        </p>
      </div>
    </div>

    <h2>French Fries</h2>

    <div class="pizza-menu">
      <div class="french-fries">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Classic%20French%20Fries.jpeg"
          alt="Classic French Fries"
          style="width: 100%"
        />
        <h2>Classic French Fries</h2>
        <p>Weight: 200g</p>
        <p>Calories: 300 kcal</p>
        <p>Thickness: Regular</p>
        <p class="price">$3.50</p>
        <p class="features">
          Features: Crispy golden fries, lightly salted and served with ketchup.
        </p>
      </div>

      <div class="french-fries">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Cheese%20Fries.jpeg"
          alt="Cheese Fries"
          style="width: 100%"
        />
        <h2>Cheese Fries</h2>
        <p>Weight: 250g</p>
        <p>Calories: 450 kcal</p>
        <p>Thickness: Thick</p>
        <p class="price">$4.50</p>
        <p class="features">
          Features: Hot crispy fries smothered in melted cheddar cheese.
        </p>
      </div>

      <div class="french-fries">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Sweet%20Potato%20Fries.jpeg"
          alt="Sweet Potato Fries"
          style="width: 100%"
        />
        <h2>Sweet Potato Fries</h2>
        <p>Weight: 220g</p>
        <p>Calories: 350 kcal</p>
        <p>Thickness: Thin</p>
        <p class="price">$4.00</p>
        <p class="features">
          Features: Crispy sweet potato fries with a dash of sea salt, served
          with a side of spicy mayo.
        </p>
      </div>
    </div>

    <h2>Fried Chicken</h2>

    <div class="pizza-menu">
      <div class="fried-chicken">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Classic%20Fried%20Chicken.jpeg"
          alt="Classic Fried Chicken"
          style="width: 100%"
        />
        <h2>Classic Fried Chicken</h2>
        <p>Weight: 800g</p>
        <p>Calories: 1800 kcal</p>
        <p>Spiciness: Mild</p>
        <p class="price">$12.99</p>
        <p class="features">
          Features: Crispy golden-brown crust with juicy, tender chicken inside.
        </p>
      </div>

      <div class="fried-chicken">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Spicy%20Buffalo%20Chicken.jpeg"
          alt="Spicy Buffalo Chicken"
          style="width: 100%"
        />
        <h2>Spicy Buffalo Chicken</h2>
        <p>Weight: 750g</p>
        <p>Calories: 1700 kcal</p>
        <p>Spiciness: Medium</p>
        <p class="price">$13.50</p>
        <p class="features">
          Features: Coated in spicy buffalo sauce with a hint of garlic and
          butter.
        </p>
      </div>

      <div class="fried-chicken">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Honey%20Garlic%20Chicken.jpeg"
          alt="Honey Garlic Chicken"
          style="width: 100%"
        />
        <h2>Honey Garlic Chicken</h2>
        <p>Weight: 700g</p>
        <p>Calories: 1600 kcal</p>
        <p>Spiciness: Mild</p>
        <p class="price">$14.25</p>
        <p class="features">
          Features: Glazed with a sweet and savory honey garlic sauce.
        </p>
      </div>

      <div class="fried-chicken">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Korean%20Style%20Chicken.jpeg"
          alt="Korean Style Chicken"
          style="width: 100%"
        />
        <h2>Korean Style Chicken</h2>
        <p>Weight: 780g</p>
        <p>Calories: 1900 kcal</p>
        <p>Spiciness: Hot</p>
        <p class="price">$14.75</p>
        <p class="features">
          Features: Double-fried for extra crispiness, tossed in a tangy, spicy
          Korean sauce.
        </p>
      </div>

      <div class="fried-chicken">
        <img
          src="https://baokker-oss-blog-hangzhou.oss-cn-hangzhou.aliyuncs.com/food_imgs/Lemon%20Pepper%20Chicken.jpeg"
          alt="Lemon Pepper Chicken"
          style="width: 100%"
        />
        <h2>Lemon Pepper Chicken</h2>
        <p>Weight: 730g</p>
        <p>Calories: 1750 kcal</p>
        <p>Spiciness: Mild</p>
        <p class="price">$13.99</p>
        <p class="features">
          Features: Zesty lemon and cracked black pepper seasoning.
        </p>
      </div>
    </div>
  </body>
</html>

  • 假设我们已经创建好了对应的食物类,想要解析网页中的某个食物写入对应类中,以Pizza为例:
/*
* FoodHtmlParser.java
*/

package com.example.service;

import com.example.model.*;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.Jsoup;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

public class FoodHtmlParser {
    public List<Pizza> parsePizzas(String filePath) {
        List<Pizza> pizzas = new ArrayList<>();
        try {
            Document doc = Jsoup.parse(new File(filePath), "UTF-8");
            Elements pizzaDivs = doc.select("div.pizza");
            for (Element pizzaDiv : pizzaDivs) {
                String imgUrl = pizzaDiv.selectFirst("img").attr("src");
                String name = pizzaDiv.selectFirst("h2").text();
                String weight = pizzaDiv.select("p:contains(Weight)").text().replace("Weight: ", "");
                String caloriesStr = pizzaDiv.select("p:contains(Calories)").text()
                        .replace("Calories: ", "")
                        .replace("kcal", "").trim();
                int calories = Integer.parseInt(caloriesStr);
                String priceStr = pizzaDiv.select("p.price").text().replace("$", "");
                double price = Double.parseDouble(priceStr);
                String radius = pizzaDiv.select("p:contains(Radius)").text().replace("Radius: ", "");
                String features = pizzaDiv.select("p.features").text().replace("Features: ", "");
                Pizza pizza = new Pizza();
                pizza.setName(name);
                pizza.setWeight(weight);
                pizza.setCalories(calories);
                pizza.setRadius(radius);
                pizza.setPrice(price);
                pizza.setFeatures(features);
                pizza.setImgUrl(imgUrl);
                pizzas.add(pizza);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return pizzas;
    }
}
  • 其余的食物类型同理:
    public List<FrenchFries> parseFrenchFriess(String filePath) throws java.io.IOException {
    List<FrenchFries> frenchFriesList = new ArrayList<>();
        Document doc = Jsoup.parse(new File(filePath), "UTF-8");
        Elements friesDivs = doc.select("div.french-fries");

        for (Element friesDiv : friesDivs) {
            String imgUrl = friesDiv.selectFirst("img").attr("src");
            String name = friesDiv.selectFirst("h2").text();
            String weight = friesDiv.select("p:contains(Weight)").text().replace("Weight: ", "");
            String caloriesStr = friesDiv.select("p:contains(Calories)").text()
                .replace("Calories: ", "")
                .replace("kcal", "").trim();
            int calories = Integer.parseInt(caloriesStr);

            String priceStr = friesDiv.select("p.price").text().replace("$", "");
            double price = Double.parseDouble(priceStr);

            String thickness = friesDiv.select("p:contains(Thickness)").text()
                .replace("Thickness: ", "");

            String features = friesDiv.select("p.features").text()
                .replace("Features: ", "");

            FrenchFries frenchFries = new FrenchFries();
            frenchFries.setName(name);
            frenchFries.setWeight(weight);
            frenchFries.setCalories(calories);
            frenchFries.setThickness(thickness);
            frenchFries.setPrice(price);
            frenchFries.setFeatures(features);
            frenchFries.setImgUrl(imgUrl);

            frenchFriesList.add(frenchFries);
        }
        return frenchFriesList;
    }

    public List<FriedChicken> parseFriedChickens(String filePath) throws java.io.IOException {
    List<FriedChicken> friedChickenList = new ArrayList<>();
        Document doc = Jsoup.parse(new File(filePath), "UTF-8");
        Elements friedChickenDivs = doc.select("div.fried-chicken");

        for (Element friedChickenDiv : friedChickenDivs) {
            String imgUrl = friedChickenDiv.selectFirst("img").attr("src");
            String name = friedChickenDiv.selectFirst("h2").text();
            String weight = friedChickenDiv.select("p:contains(Weight)").text().replace("Weight: ", "");
            String caloriesStr = friedChickenDiv.select("p:contains(Calories)").text()
                .replace("Calories: ", "")
                .replace("kcal", "").trim();
            int calories = Integer.parseInt(caloriesStr);

            String priceStr = friedChickenDiv.select("p.price").text().replace("$", "");
            double price = Double.parseDouble(priceStr);

            String spiciness = friedChickenDiv.select("p:contains(Spiciness)").text()
                .replace("Spiciness: ", "");

            String features = friedChickenDiv.select("p.features").text()
                .replace("Features: ", "");

            FriedChicken friedChicken = new FriedChicken();
            friedChicken.setName(name);
            friedChicken.setWeight(weight);
            friedChicken.setCalories(calories);
            friedChicken.setSpiciness(spiciness);
            friedChicken.setPrice(price);
            friedChicken.setFeatures(features);
            friedChicken.setImgUrl(imgUrl);

            friedChickenList.add(friedChicken);
        }
        return friedChickenList;
    }
  • 注意: Jsoup的parse方法需要throw Exception,若不抛出异常会导致编译错误。
posted @ 2025-10-22 16:59  妲丽安的书架  阅读(13)  评论(0)    收藏  举报