通过ESMC模型对蛋白质进行Embedding

## 配置区

data=[]       #[(id,seq)....]
save_path=""
#----------------



#----------------

from esm.models.esmc import ESMC
from esm.sdk.api import ESMProtein, LogitsConfig

import numpy as np
import os

model_name="esmc_600m"
client = ESMC.from_pretrained(model_name).to("cuda") # or "cpu"
print("Finish load model")

counter=0
for item in data:
    if os.path.exists(f"{save_path}/{item[0]}.npy")==False:
        protein = ESMProtein(sequence=item[1])
        protein_tensor = client.encode(protein)
        logits_output = client.logits(
            protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
        )
        print(logits_output.embeddings[0].shape)
        np.save(f"{save_path}/{item[0]}.npy",logits_output.embeddings[0].cpu())
    counter+=1
    print(counter,len(data),int(counter/len(data)*100),"%")
posted @ 2025-08-25 00:02  ylifs  阅读(47)  评论(0)    收藏  举报