tiktoken计算chatgpt-token
强迫症表示必须要看到token数量,自己写一个 拿GPT写一个玩玩
核心代码:
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4")
print(enc.encode("hello world!"))
import tkinter as tk
import pyperclip
import tiktoken
class TokenCounter:
    def __init__(self):
        # GUI Setup
        self.root = tk.Tk()
        self.root.geometry("300x200")
        self.root.title("Token Counter")
        self.btn = tk.Button(self.root, text="Get Token Count", command=self.count_tokens)
        self.btn.pack(pady=10, padx=10)
        self.textbox = tk.Text(self.root, height=2)
        self.textbox.pack(pady=10, padx=10)
        # Tokenizer Setup
        self.token_encoders = {
            "gpt-4": tiktoken.encoding_for_model("gpt-4"),
            "gpt-3.5": tiktoken.encoding_for_model("gpt-3.5-turbo"),
        }
        # Bind FocusIn event to window
        self.root.bind("<FocusIn>", lambda event: self.count_tokens())
        # Initialize
        self.root.mainloop()
    def count_tokens(self):
        # Get message from clipboard
        message = pyperclip.paste()
        # Check if message is empty
        if not message:
            self.textbox.delete("1.0", tk.END)
            self.textbox.insert(tk.END, "Clipboard is empty!")
            return
        # Get model encodings and token counts
        token_counts = {}
        for model, encoder in self.token_encoders.items():
            token_counts[model] = str(len(encoder.encode(message)))
        # Display token counts
        self.textbox.delete("1.0", tk.END)
        for model, count in token_counts.items():
            display_string = f"{model}: {count}\n"
            self.textbox.insert(tk.END, display_string)
if __name__ == "__main__":
    TokenCounter()
双击后,有一个小窗口,点击按钮/获取焦点 时自动读取 剪切板 上的文字,并计算token
(gpt-3.5-turbo和gpt-4计算的token是一样的)
"库"占一个token,"啊"占两个token,😥


                
            
        
浙公网安备 33010602011771号