实现图形化界面的
import tkinter as tk
from tkinter import messagebox
from googletrans import Translator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# 初始化模型
tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
# 预测函数
def predict_fake(title, text):
input_str = "<title>" + title + "<content>" + text + "<end>"
input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
with torch.no_grad():
output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device))
return dict(zip(["Fake", "Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] ))
# 文本预处理
def preprocess_text(text):
# 替换双引号为单引号
text = text.replace('"', "'")
# 删除所有换行符
text = text.replace('\n', ' ')
return text
# 翻译中文为英文
def translate_to_english(text):
translator = Translator()
try:
translated = translator.translate(text, src='zh-cn', dest='en')
return translated.text
except Exception as e:
print(f"Translation failed: {e}")
return text
# 处理输入并显示结果
def handle_input():
title = title_entry.get()
content = content_entry.get("1.0", tk.END).strip()
if not title or not content:
messagebox.showerror("Input Error", "Title and Content cannot be empty!")
return
# 如果是中文新闻,进行翻译
if any(u'\u4e00' <= ch <= u'\u9fff' for ch in title + content): # 判断是否包含中文
content = translate_to_english(content)
title = translate_to_english(title)
# 预处理文本
title = preprocess_text(title)
content = preprocess_text(content)
# 调用模型进行预测
result = predict_fake(title, content)
# 显示结果
result_label.config(text=f"Prediction: {'Fake' if result['Fake'] > result['Real'] else 'Real'}\nFake: {result['Fake']:.2f}\nReal: {result['Real']:.2f}")
# 创建主窗口
root = tk.Tk()
root.title("Fake News Detection")
# 创建标题输入框
tk.Label(root, text="Title:").pack(padx=10, pady=5)
title_entry = tk.Entry(root, width=50)
title_entry.pack(padx=10, pady=5)
# 创建内容输入框
tk.Label(root, text="Content:").pack(padx=10, pady=5)
content_entry = tk.Text(root, width=50, height=10)
content_entry.pack(padx=10, pady=5)
# 创建预测按钮
predict_button = tk.Button(root, text="Predict", command=handle_input)
predict_button.pack(padx=10, pady=20)
# 显示结果的标签
result_label = tk.Label(root, text="Prediction: ", font=("Arial", 12))
result_label.pack(padx=10, pady=5)
# 运行GUI
root.mainloop()
展示,能够把新闻中的双引号改为单引号,并把换行符改为空格,能够把中文新闻改为英文新闻,但是因为训练数据集的原因,没有办法实现对中文新闻的精准预测