jsoup解析本地html网页到本地2
- 在之前的jsoup解析本地html网页到本地中,虽然可以实现不同食物的解析,但是每个食物单独写一个方法实在太麻烦了
- 注意到所有食物均属于Food的子类,且解析方法中有许多重复代码
- 可将
Food food以及Element foodDiv这两个方法中重复的参数作为通用方法的参数,提取公用方法:
// 提取公共解析逻辑到独立方法
private void parseCommonFields(Element foodDiv, Food food) {
String imgUrl = Objects.requireNonNull(foodDiv.selectFirst("img")).attr("src");
String name = Objects.requireNonNull(foodDiv.selectFirst("h2")).text();
String weight = foodDiv.select("p:contains(Weight)").text().replace("Weight: ", "");
String caloriesStr = foodDiv.select("p:contains(Calories)").text()
.replace("Calories: ", "")
.replace("kcal", "").trim();
int calories = Integer.parseInt(caloriesStr);
String priceStr = foodDiv.select("p.price").text().replace("$", "");
double price = Double.parseDouble(priceStr);
String features = foodDiv.select("p.features").text().replace("Features: ", "");
food.setName(name);
food.setWeight(weight);
food.setCalories(calories);
food.setPrice(price);
food.setFeatures(features);
food.setImgUrl(imgUrl);
}
然后,每个食物只需要调用公共方法,并提取自己的特有部分即可:
- Pizzas:
public List<Pizza> parsePizzas(String filePath) {
List<Pizza> pizzas = new ArrayList<>();
try {
Document doc = Jsoup.parse(new File(filePath), "UTF-8");
Elements pizzaDivs = doc.select("div.pizza");
for (Element pizzaDiv : pizzaDivs) {
Pizza pizza = new Pizza();
parseCommonFields(pizzaDiv, pizza); // 调用公共解析方法
// 只处理Pizza特有的字段
String radius = pizzaDiv.select("p:contains(Radius)").text().replace("Radius: ", "");
pizza.setRadius(radius);
pizzas.add(pizza);
}
} catch (Exception e) {
System.out.println("解析Pizza数据失败" + e.getMessage());
}
return pizzas;
}
- FrenchFries:
public List<FrenchFries> parseFrenchFriess(String filePath) {
List<FrenchFries> frenchFriesList = new ArrayList<>();
try {
Document doc = Jsoup.parse(new File(filePath), "UTF-8");
Elements friesDivs = doc.select("div.french-fries");
for (Element friesDiv : friesDivs) {
FrenchFries frenchFries = new FrenchFries();
parseCommonFields(friesDiv, frenchFries); // 调用公共解析方法
// 只处理FrenchFries特有的字段
String thickness = friesDiv.select("p:contains(Thickness)").text().replace("Thickness: ", "");
frenchFries.setThickness(thickness);
frenchFriesList.add(frenchFries);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return frenchFriesList;
}
- FriedChicken:
public List<FriedChicken> parseFriedChickens(String filePath) {
List<FriedChicken> friedChickenList = new ArrayList<>();
try {
Document doc = Jsoup.parse(new File(filePath), "UTF-8");
Elements friedChickenDivs = doc.select("div.fried-chicken");
for (Element friedChickenDiv : friedChickenDivs) {
FriedChicken friedChicken = new FriedChicken();
parseCommonFields(friedChickenDiv, friedChicken); // 调用公共解析方法
// 只处理FriedChicken特有的字段
String spiciness = friedChickenDiv.select("p:contains(Spiciness)").text().replace("Spiciness: ", "");
friedChicken.setSpiciness(spiciness);
friedChickenList.add(friedChicken);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return friedChickenList;
}

浙公网安备 33010602011771号