huggingface模型使用Python,智普清言,图生文,文本转语音

完整代码

from transformers import pipeline
from zhipuai import ZhipuAI
client = ZhipuAI(api_key='智普清言的API_KEY')

import json
import requests
def text2speech(message):
  headers = {"Authorization": "Bearer huggingface的token"}
  API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
  payloads = {
      "inputs": message
  }

  response = requests.post(API_URL, headers=headers, json=payloads)
  with open("audio.flac","wb") as file:
    file.write(response.content)

def generate_story(sc):
  response = client.chat.completions.create(
      model = 'glm-4',
      messages = [{
          'role': 'user',
          'content': f'你是一位说书的老人,下面的context中的内容是一个外国人说的一句英文,请你根据这句话延展出一个中文的故事。最好还有点小幽默,字数控制在100字以内。context: {sc}'
      }]
  )

  print(response.choices[0].message.content)
  return response.choices[0].message.content

def img2text(url):
  image_to_text = pipeline('image-to-text',model='Salesforce/blip-image-captioning-base')
  text = image_to_text(url)[0]['generated_text']

  print(text)
  return text


scenario = img2text('bee.jpg')
story = generate_story(scenario)
text2speech(story)

第一步:

图片转文字
pip install transformers
huggingface找模型
from transformers import pipeline
def img2text(url):
  image_to_text = pipeline('image-to-text',model='Salesforce/blip-image-captioning-base')
  text = image_to_text(url)[0]['generated_text']

  print(text)
  return text
上传一张图片
img_text = img2text('bee.jpg')
输出结果:

第二步:

文字扩展
pip install zhipuai
代码
from zhipuai import ZhipuAI
def generate_story(sc):
  client = ZhipuAI(api_key='74a865be653bdde694c26f33c86a37db.RbKC95k2JEtBG7wY')
  response = client.chat.completions.create(
      model = 'glm-4',
      messages = [{
          'role': 'user',
          'content': f'你是一位说书的老人,下面的context中的内容是一个外国人说的一句英文,请你根据这句话延展出一个中文的故事。最好还有点小幽默,字数控制在100字以内。context: {sc}'
      }]
  )

  print(response.choices[0].message.content)
  return response.choices[0].message.content
输出结果:

第三步:

文字转语音
huggingface找模型
代码
import json
import requests
def text2speech(message):
  headers = {"Authorization": "Bearer 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"}
  API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
  payloads = {
      "inputs": message
  }

  response = requests.post(API_URL, headers=headers, json=payloads)
  with open("audio.flac","wb") as file:
    file.write(response.content)
 输出结果:
posted @ 2024-04-03 15:37  腿哥123  阅读(62)  评论(0编辑  收藏  举报