使用之前只需要简单的设置两个变量即可:
-
Memory_Allocated_GB = 5 # GiB,需要占用的显存大小,单位GiB
-
GPU_ID = 2 # 需要卡显存的GPU ID
import torch
from torch import cuda
import time
import psutil
def get_gpu_used_information():
device_count = cuda.device_count()
used_information = []
for gpu_id in range(device_count):
gpu_processes_list = cuda.list_gpu_processes(gpu_id).split('\n')
for process in gpu_processes_list:
if process.startswith('process'):
inf = process.split()
used_information.append((gpu_id, inf[1], inf[3][:-4], psutil.Process(int(inf[1])).username().split('\\')[1]))
return used_information
def print_information(infs):
print('\033[0:32m+----------------------------------------------------------+\033[0m')
print('\033[0:32m| GPU PID MEMORY-USED USER-NAME |\033[0m')
for inf in infs:
record = "{:>8d} {:>9d} {:>14d}MiB {}".format(inf[0], int(inf[1]), int(inf[2]), str(inf[3]).rjust(15))
print('\033[0:32m|' + record + ' ' * 6 + '|\033[0m')
print('\033[0:32m+----------------------------------------------------------+\033[0m')
if __name__ == '__main__':
Memory_Allocated_GB = 5 # GiB
GPU_ID = 2
ace = None
while True:
print('\033[0:33mNow: ' + time.strftime('%Y-%m-%d %H:%M:%S') + '\033[0m')
used_inf = get_gpu_used_information()
print_information(used_inf)
try:
device = 'cuda:{}'.format(GPU_ID)
ace = torch.zeros([Memory_Allocated_GB, 1024, 1024, 256], device=device)
except RuntimeError:
ace = None
for sec in range(10)[::-1]:
print("\r\033[0:31mRace to control GPU: {} {}GiB failed, tra again after {}s...\033[0m".format(
GPU_ID, Memory_Allocated_GB, sec), flush=True, end='')
time.sleep(1)
print()
else:
print("\033[1:35mGPU: {}, memory allocated: {} GB\033[0m".format(GPU_ID, Memory_Allocated_GB))
while True:
print("\r\033[1:35mGPU: {}, hold-up time: {}\033[0m".format(GPU_ID, time.strftime('%Y-%m-%d %H:%M:%S')), end='',
flush=True)
time.sleep(1)