:)搭建公司级的chatGPTmingu-|

搭建公司级的chatGPT(业务答疑)

一 搭建对话服务平台

参考工程:gradio-app/gradio: Create UIs for your machine learning model in Python in 3 minutes (github.com)

参考链接:https://www.zhihu.com/question/454990715 ui框架难度等级都在里面

从其中的指导中,可以链接到 gradio 完整的说明文档。

  https://gradio.app/

1.1 明确写明作用与特点

特点:Build Machine Learning Web Apps — in Python

1.2 写清楚作用和适用群体

作用:interactive app

1.3 开始介绍

一般分三步比较容易普及

一般 pip install gradio 肯定不需要git clone 工程

1)可以 处理 图片 audio

2)可以生成各种button

3)可以引用各种任务 如chatbot

1.4 对话

代码 gr.Chatbot

 

链接:Creating A Chatbot (gradio.app)

 

实际工程位置:/home/arm/disk_arm_8T/xiaoliu/AI610-SDK-r1p0-00eac0/GPT2_chinese_chat/GPT2-chitchat/interact_gpt2_gardio.py

一些端口不能打开, 请先关闭端口。

sudo lsof -i:<端口号>

kill -9 PID

代码:

  1 import transformers
  2 import torch
  3 import os
  4 import json
  5 import random
  6 import numpy as np
  7 import argparse
  8 # from torch.utils.tensorboard import SummaryWriter
  9 from datetime import datetime
 10 from tqdm import tqdm
 11 from torch.nn import DataParallel
 12 import logging
 13 from transformers import GPT2TokenizerFast, GPT2LMHeadModel, GPT2Config
 14 from transformers import BertTokenizerFast, AutoTokenizer, BertModel, BertTokenizer
 15 # from transformers import BertTokenizer
 16 from os.path import join, exists
 17 from itertools import zip_longest, chain
 18 # from chatbot.model import DialogueGPT2Model
 19 # from dataset import MyDataset
 20 from torch.utils.data import Dataset, DataLoader
 21 from torch.nn import CrossEntropyLoss
 22 # from sklearn.model_selection import train_test_split
 23 import torch.nn.functional as F
 24 # from gpt3 import GPT3ForCausalLM
 25 import gradio as gr
 26 
 27 PAD = '[PAD]'
 28 pad_id = 0
 29 
 30 
 31 def set_args():
 32     """
 33     Sets up the arguments.
 34     """
 35     parser = argparse.ArgumentParser()
 36     parser.add_argument('--device', default='0', type=str, required=False, help='生成设备')
 37     parser.add_argument('--temperature', default=1, type=float, required=False, help='生成的temperature')
 38     parser.add_argument('--topk', default=8, type=int, required=False, help='最高k选1')
 39     parser.add_argument('--topp', default=0, type=float, required=False, help='最高积累概率')
 40     # parser.add_argument('--model_config', default='config/model_config_dialogue_small.json', type=str, required=False,
 41     #                     help='模型参数')
 42     parser.add_argument('--log_path', default='data/interact.log', type=str, required=False, help='interact日志存放位置')
 43     parser.add_argument('--vocab_path', default='vocab/vocab.txt', type=str, required=False, help='选择词库')
 44     # parser.add_argument('--model_path',
 45     #                     default='/home/arm/disk_arm_8T/xiaoliu/AI610-SDK-r1p0-00eac0/GPT2_chinese_chat/GPT2-chitchat-mmi/mmi_model/model_epoch30', type=str, required=False, help='对话模型路径')
 46     # parser.add_argument('--model_path', default='model/gpt3_bashe_epoch30', type=str, required=False, help='对话模型路径')
 47     parser.add_argument('--model_path', default='/home/arm/disk_arm_8T/xiaoliu/AI610-SDK-r1p0-00eac0/GPT2_chinese_chat/large-chat-GPT2/firefly-2b6', type=str, required=False, help='对话模型路径')
 48     parser.add_argument('--save_samples_path', default="sample/", type=str, required=False, help="保存聊天记录的文件路径")
 49     parser.add_argument('--repetition_penalty', default=1.0, type=float, required=False,
 50                         help="重复惩罚参数,若生成的对话重复性较高,可适当提高该参数")
 51     # parser.add_argument('--seed', type=int, default=None, help='设置种子用于生成随机数,以使得训练的结果是确定的')
 52     parser.add_argument('--max_len', type=int, default=100, help='每个utterance的最大长度,超过指定长度则进行截断')
 53     parser.add_argument('--max_history_len', type=int, default=5, help="dialogue history的最大长度")
 54     parser.add_argument('--no_cuda', action='store_true', help='不使用GPU进行预测')
 55     parser.add_argument('--server', default='10.188.72.25', type=str, required=False, help='服务器地址')
 56     parser.add_argument('--port', default='7861', type=str, required=False, help='服务器访问端口')
 57     parser.add_argument('--concurrency_count', default=5, type=int, required=False, help='同时访问人数')
 58     return parser.parse_args()
 59 
 60 
 61 def create_logger(args):
 62     """
 63     将日志输出到日志文件和控制台
 64     """
 65     logger = logging.getLogger(__name__)
 66     logger.setLevel(logging.INFO)
 67 
 68     formatter = logging.Formatter(
 69         '%(asctime)s - %(levelname)s - %(message)s')
 70 
 71     # 创建一个handler,用于写入日志文件
 72     file_handler = logging.FileHandler(
 73         filename=args.log_path)
 74     file_handler.setFormatter(formatter)
 75     file_handler.setLevel(logging.INFO)
 76     logger.addHandler(file_handler)
 77 
 78     # 创建一个handler,用于将日志输出到控制台
 79     console = logging.StreamHandler()
 80     console.setLevel(logging.DEBUG)
 81     console.setFormatter(formatter)
 82     logger.addHandler(console)
 83 
 84     return logger
 85 
 86 
 87 def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
 88     """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
 89         Args:
 90             logits: logits distribution shape (vocab size)
 91             top_k > 0: keep only top k tokens with highest probability (top-k filtering).
 92             top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
 93                 Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
 94         From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
 95     """
 96     assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
 97     top_k = min(top_k, logits.size(-1))  # Safety check
 98     if top_k > 0:
 99         # Remove all tokens with a probability less than the last token of the top-k
100         # torch.topk()返回最后一维最大的top_k个元素,返回值为二维(values,indices)
101         # ...表示其他维度由计算机自行推断
102         indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
103         logits[indices_to_remove] = filter_value  # 对于topk之外的其他元素的logits值设为负无穷
104 
105     if top_p > 0.0:
106         sorted_logits, sorted_indices = torch.sort(logits, descending=True)  # 对logits进行递减排序
107         cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
108 
109         # Remove tokens with cumulative probability above the threshold
110         sorted_indices_to_remove = cumulative_probs > top_p
111         # Shift the indices to the right to keep also the first token above the threshold
112         sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
113         sorted_indices_to_remove[..., 0] = 0
114 
115         indices_to_remove = sorted_indices[sorted_indices_to_remove]
116         logits[indices_to_remove] = filter_value
117     return logits
118 
119 # python interact.py --device 0 --vocab_path vocab/vocab3.txt --model_path model/gpt3_bashe_epoch30 --max_history_len 20 --temperature 0.9
120 def main():
121 
122     args = set_args()
123     logger = create_logger(args)
124     # 当用户使用GPU,并且GPU可用时
125     args.cuda = torch.cuda.is_available() and not args.no_cuda
126     device = 'cuda:0' if args.cuda else 'cpu'
127     logger.info('using device:{}'.format(device))
128     os.environ["CUDA_VISIBLE_DEVICES"] = args.device
129     tokenizer = BertTokenizerFast(vocab_file=args.vocab_path, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
130     model = GPT2LMHeadModel.from_pretrained(args.model_path, ignore_mismatched_sizes=True)
131     
132     model = model.to(device)
133     model.eval()
134     server_name = args.server
135     # int need
136     sever_port = int(args.port)
137     if args.save_samples_path:
138         if not os.path.exists(args.save_samples_path):
139             os.makedirs(args.save_samples_path)
140         samples_file = open(args.save_samples_path + '/samples.txt', 'a', encoding='utf8')
141         samples_file.write("聊天记录{}:\n".format(datetime.now()))
142     # 存储聊天记录,每个utterance以token的id的形式进行存储
143     history = []
144     print('开始和chatbot聊天,输入CTRL + Z以退出')
145 
146     def chat(gradio_history):
147         user_input = gradio_history[-1][0]
148         if args.save_samples_path:
149                 samples_file.write("user:{}\n".format(user_input))
150         text_ids = tokenizer.encode(user_input, add_special_tokens=False)
151         history.append(text_ids)
152         input_ids = [tokenizer.cls_token_id]  # 每个input以[CLS]为开头
153 
154         for history_id, history_utr in enumerate(history[-args.max_history_len:]):
155             input_ids.extend(history_utr)
156             input_ids.append(tokenizer.sep_token_id)
157         input_ids = torch.tensor(input_ids).long().to(device)
158         input_ids = input_ids.unsqueeze(0)
159         response = []  # 根据context,生成的response
160         # 最多生成max_len个token
161         for _ in range(args.max_len):
162             outputs = model(input_ids=input_ids)
163             logits = outputs.logits
164             # logits = outputs.last_hidden_state
165             # print(logits)
166             next_token_logits = logits[0, -1, :]
167             # 对于已生成的结果generated中的每个token添加一个重复惩罚项,降低其生成概率
168             for id in set(response):
169                 next_token_logits[id] /= args.repetition_penalty
170             next_token_logits = next_token_logits / args.temperature
171             # 对于[UNK]的概率设为无穷小,也就是说模型的预测结果不可能是[UNK]这个token
172             next_token_logits[tokenizer.convert_tokens_to_ids('[UNK]')] = -float('Inf')
173             filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=args.topk, top_p=args.topp)
174             # torch.multinomial表示从候选集合中无放回地进行抽取num_samples个元素,权重越高,抽到的几率越高,返回元素的下标
175             next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
176             if next_token == tokenizer.sep_token_id:  # 遇到[SEP]则表明response生成结束
177                 break
178             response.append(next_token.item())
179             input_ids = torch.cat((input_ids, next_token.unsqueeze(0)), dim=1)
180             # his_text = tokenizer.convert_ids_to_tokens(curr_input_tensor.tolist())
181             # print("his_text:{}".format(his_text))
182         history.append(response)
183         text = tokenizer.convert_ids_to_tokens(response)
184         chatbot_output = "".join(text)
185         print("chatbot:" + chatbot_output)
186         gradio_history[-1][1] = chatbot_output
187         if args.save_samples_path:
188             samples_file.write("chatbot:{}\n".format("".join(text)))
189         return gradio_history
190         
191     def user(user_message, history):
192         return "", history + [[user_message, None]], user_message
193 
194     try:
195         
196         with gr.Blocks() as demo:
197             chatbot = gr.Chatbot([], elem_id="chatbot").style(height=500)
198             msg = gr.Textbox(label="User")
199             clear = gr.Button("Clear")
200             # user_input = ''
201             msg.submit(user, [msg, chatbot],
202                        [msg, chatbot],
203                        queue=False).then(
204                 chat, chatbot, chatbot
205             )
206             clear.click(lambda: None, None, chatbot, queue=False)   
207         demo.queue(concurrency_count=3).launch(share=True,
208                                                server_port=sever_port,
209                                                server_name=server_name)
210         
211     except KeyboardInterrupt:
212         if args.save_samples_path:
213             samples_file.close()
214         gr.close_all()    
215 
216 if __name__ == '__main__':
217     main()
218     
219     
View Code

 

1.5 web

demoweb

import gradio as gr
import random

def demo(name):
    return name + "是一个小宝贝小天才" + ":)"

# def gameplay(sample):
#     if sample=="1":
#       games = ["刮鼻子","拉耳朵",
#              "蹲起20","单脚站20",
#              "跑一圈","跑两圈","保留"]
#     res = random.choices(games, k=int(sample))
#     return "please " + ",".join(res) + "!"
def gameplay(sample):
    if sample=="1":
        games = ["刮鼻子","拉耳朵",
                 "蹲起20","单脚站20",
                 "跑一圈","跑两圈","保留"]
        res = random.choices(games, k=1)
    elif sample == "2":
        games = ["qinqin","baobao",
                 "louzhe","pazhang",
                 "跑一圈","qinlian","保留"]
        res = random.choices(games, k=1)
    return "please " + ",".join(res) + "!"
obj = gr.Interface(fn=gameplay, inputs="text", outputs="text")
# obj.launch()
#share=True,server_port=sever_port,server_name=server_name
obj.launch(share=True,server_port=7861,server_name="10.188.72.25")

  

 

interact_gradio.py

import transformers
import torch
import os
import json
import random
import numpy as np
import argparse
# from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from tqdm import tqdm
from torch.nn import DataParallel
import logging
from transformers import GPT2TokenizerFast, GPT2LMHeadModel, GPT2Config
from transformers import BertTokenizerFast, AutoTokenizer, BertModel, BertTokenizer
# from transformers import BertTokenizer
from os.path import join, exists
from itertools import zip_longest, chain
# from chatbot.model import DialogueGPT2Model
# from dataset import MyDataset
from torch.utils.data import Dataset, DataLoader
from torch.nn import CrossEntropyLoss
# from sklearn.model_selection import train_test_split
import torch.nn.functional as F
# from gpt3 import GPT3ForCausalLM
import gradio as gr

PAD = '[PAD]'
pad_id = 0


def set_args():
    """
    Sets up the arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--device', default='0', type=str, required=False, help='生成设备')
    parser.add_argument('--temperature', default=1, type=float, required=False, help='生成的temperature')
    parser.add_argument('--topk', default=8, type=int, required=False, help='最高k选1')
    parser.add_argument('--topp', default=0, type=float, required=False, help='最高积累概率')
    # parser.add_argument('--model_config', default='config/model_config_dialogue_small.json', type=str, required=False,
    #                     help='模型参数')
    parser.add_argument('--log_path', default='data/interact.log', type=str, required=False, help='interact日志存放位置')
    parser.add_argument('--vocab_path', default='vocab/vocab.txt', type=str, required=False, help='选择词库')
    parser.add_argument('--model_path',
                        default='/home/arm/disk_arm_8T/xiaoliu/AI610-SDK-r1p0-00eac0/GPT2_chinese_chat/GPT2-chitchat/model_chat1to6_bs8_lay20/min_ppl_model', type=str, required=False, help='对话模型路径')
    # parser.add_argument('--model_path', default='model/gpt3_bashe_epoch30', type=str, required=False, help='对话模型路径')
    parser.add_argument('--save_samples_path', default="sample/", type=str, required=False, help="保存聊天记录的文件路径")
    parser.add_argument('--repetition_penalty', default=1.0, type=float, required=False,
                        help="重复惩罚参数,若生成的对话重复性较高,可适当提高该参数")
    # parser.add_argument('--seed', type=int, default=None, help='设置种子用于生成随机数,以使得训练的结果是确定的')
    parser.add_argument('--max_len', type=int, default=100, help='每个utterance的最大长度,超过指定长度则进行截断')
    parser.add_argument('--max_history_len', type=int, default=3, help="dialogue history的最大长度")
    parser.add_argument('--no_cuda', action='store_true', help='不使用GPU进行预测')
    parser.add_argument('--server', default='10.188.72.25', type=str, required=False, help='服务器地址')
    parser.add_argument('--port', default='7860', type=str, required=False, help='服务器访问端口')
    parser.add_argument('--concurrency_count', default=5, type=int, required=False, help='同时访问人数')
    return parser.parse_args()


def create_logger(args):
    """
    将日志输出到日志文件和控制台
    """
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(message)s')

    # 创建一个handler,用于写入日志文件
    file_handler = logging.FileHandler(
        filename=args.log_path)
    file_handler.setFormatter(formatter)
    file_handler.setLevel(logging.INFO)
    logger.addHandler(file_handler)

    # 创建一个handler,用于将日志输出到控制台
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    console.setFormatter(formatter)
    logger.addHandler(console)

    return logger


def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocab size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        # torch.topk()返回最后一维最大的top_k个元素,返回值为二维(values,indices)
        # ...表示其他维度由计算机自行推断
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value  # 对于topk之外的其他元素的logits值设为负无穷

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)  # 对logits进行递减排序
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]
        logits[indices_to_remove] = filter_value
    return logits

# python interact.py --device 0 --vocab_path vocab/vocab3.txt --model_path model/gpt3_bashe_epoch30 --max_history_len 20 --temperature 0.9
def main():

    args = set_args()
    logger = create_logger(args)
    # 当用户使用GPU,并且GPU可用时
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    device = 'cuda:0' if args.cuda else 'cpu'
    logger.info('using device:{}'.format(device))
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
    tokenizer = BertTokenizerFast(vocab_file=args.vocab_path, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
    model = GPT2LMHeadModel.from_pretrained(args.model_path)
    
    model = model.to(device)
    model.eval()
    server_name = args.server
    sever_port = args.port
    if args.save_samples_path:
        if not os.path.exists(args.save_samples_path):
            os.makedirs(args.save_samples_path)
        samples_file = open(args.save_samples_path + '/samples.txt', 'a', encoding='utf8')
        samples_file.write("聊天记录{}:\n".format(datetime.now()))
    # 存储聊天记录,每个utterance以token的id的形式进行存储
    history = []
    print('开始和chatbot聊天,输入CTRL + Z以退出')

    def chat(gradio_history):
        user_input = gradio_history[-1][0]
        if args.save_samples_path:
                samples_file.write("user:{}\n".format(user_input))
        text_ids = tokenizer.encode(user_input, add_special_tokens=False)
        history.append(text_ids)
        input_ids = [tokenizer.cls_token_id]  # 每个input以[CLS]为开头

        for history_id, history_utr in enumerate(history[-args.max_history_len:]):
            input_ids.extend(history_utr)
            input_ids.append(tokenizer.sep_token_id)
        input_ids = torch.tensor(input_ids).long().to(device)
        input_ids = input_ids.unsqueeze(0)
        response = []  # 根据context,生成的response
        # 最多生成max_len个token
        for _ in range(args.max_len):
            outputs = model(input_ids=input_ids)
            logits = outputs.logits
            # logits = outputs.last_hidden_state
            # print(logits)
            next_token_logits = logits[0, -1, :]
            # 对于已生成的结果generated中的每个token添加一个重复惩罚项,降低其生成概率
            for id in set(response):
                next_token_logits[id] /= args.repetition_penalty
            next_token_logits = next_token_logits / args.temperature
            # 对于[UNK]的概率设为无穷小,也就是说模型的预测结果不可能是[UNK]这个token
            next_token_logits[tokenizer.convert_tokens_to_ids('[UNK]')] = -float('Inf')
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=args.topk, top_p=args.topp)
            # torch.multinomial表示从候选集合中无放回地进行抽取num_samples个元素,权重越高,抽到的几率越高,返回元素的下标
            next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
            if next_token == tokenizer.sep_token_id:  # 遇到[SEP]则表明response生成结束
                break
            response.append(next_token.item())
            input_ids = torch.cat((input_ids, next_token.unsqueeze(0)), dim=1)
            # his_text = tokenizer.convert_ids_to_tokens(curr_input_tensor.tolist())
            # print("his_text:{}".format(his_text))
        history.append(response)
        text = tokenizer.convert_ids_to_tokens(response)
        chatbot_output = "".join(text)
        print("chatbot:" + chatbot_output)
        gradio_history[-1][1] = chatbot_output
        if args.save_samples_path:
            samples_file.write("chatbot:{}\n".format("".join(text)))
        return gradio_history
        
    def user(user_message, history):
        return "", history + [[user_message, None]], user_message

    try:
        
        with gr.Blocks() as demo:
            chatbot = gr.Chatbot([], elem_id="chatbot").style(height=500)
            msg = gr.Textbox(label="User")
            clear = gr.Button("Clear")
            # user_input = ''
            msg.submit(user, [msg, chatbot],
                       [msg, chatbot],
                       queue=False).then(
                chat, chatbot, chatbot
            )
            clear.click(lambda: None, None, chatbot, queue=False)   
        demo.queue(concurrency_count=3).launch(share=True,
                                               server_port=sever_port,
                                               server_name=server_name)
        
    except KeyboardInterrupt:
        if args.save_samples_path:
            samples_file.close()
        gr.close_all()    

if __name__ == '__main__':
    main()
    

  

chatweb

import gradio as gr
import random
import time

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="User")
    clear = gr.Button("Clear")

    def user(user_message, history):
        print(history + [[user_message, None]])
        return "", history + [[user_message, None]]

    def bot(history):
        bot_message = random.choice(["run", "hit", "throw"])
        history[-1][1] = bot_message
        time.sleep(1)
        return history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(share=True, server_port=7861, server_name="10.188.72.25")

  

 

posted on 2023-04-17 17:00  lexn  阅读(186)  评论(0编辑  收藏  举报

导航