## 配置区
data=[] #[(id,seq)....]
save_path=""
#----------------
#----------------
from esm.models.esmc import ESMC
from esm.sdk.api import ESMProtein, LogitsConfig
import numpy as np
import os
model_name="esmc_600m"
client = ESMC.from_pretrained(model_name).to("cuda") # or "cpu"
print("Finish load model")
counter=0
for item in data:
if os.path.exists(f"{save_path}/{item[0]}.npy")==False:
protein = ESMProtein(sequence=item[1])
protein_tensor = client.encode(protein)
logits_output = client.logits(
protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
)
print(logits_output.embeddings[0].shape)
np.save(f"{save_path}/{item[0]}.npy",logits_output.embeddings[0].cpu())
counter+=1
print(counter,len(data),int(counter/len(data)*100),"%")