1.watch_nvidia
def watch_nvidia ( nvidia_ids, min_memory) :
flag = [ 1 for i in nvidia_ids]
for i in nvidia_ids:
handle = pynvml. nvmlDeviceGetHandleByIndex( i)
meminfo = pynvml. nvmlDeviceGetMemoryInfo( handle)
print ( "card {} free memory is {}GB" . format ( i, meminfo. free * 1.0 / ( 1024 ** 3 ) ) )
if meminfo. free * 1.0 / ( 1024 ** 3 ) > min_memory:
flag[ i- 1 ] = 0
else :
flag[ i- 1 ] = 1
if 0 in flag:
free_num = 0
for i in flag:
if i == 0 :
free_num += 1
return free_num
else :
print ( "no free card!" )
return - 1
2.send_msg
def send_msg ( target_email, msg) :
sender = 'xxxxx@163.com'
receivers = [ target_email]
message = MIMEText( msg, 'plain' , 'utf-8' )
subject = 'nvidia显卡监控'
message[ 'Subject' ] = Header( subject, 'utf-8' )
server = smtplib. SMTP_SSL( 'smtp.163.com' )
server. connect( 'smtp.163.com' , 465 )
server. login( sender, "xxxxxx" )
server. sendmail( sender, receivers, message. as_string( ) )
server. quit( )
3.完整脚本nvidia.py
import pynvml
pynvml. nvmlInit( )
import time
import os
import smtplib
from email. mime. text import MIMEText
from email. header import Header
def send_msg ( target_email, msg) :
sender = 'xxxxx@163.com'
receivers = [ target_email]
message = MIMEText( msg, 'plain' , 'utf-8' )
subject = 'nvidia显卡监控'
message[ 'Subject' ] = Header( subject, 'utf-8' )
server = smtplib. SMTP_SSL( 'smtp.163.com' )
server. connect( 'smtp.163.com' , 465 )
server. login( sender, "xxxxxxx" )
server. sendmail( sender, receivers, message. as_string( ) )
server. quit( )
def watch_nvidia ( nvidia_ids, min_memory) :
flag = [ 1 for i in nvidia_ids]
for i in nvidia_ids:
handle = pynvml. nvmlDeviceGetHandleByIndex( i)
meminfo = pynvml. nvmlDeviceGetMemoryInfo( handle)
print ( "card {} free memory is {}GB" . format ( i, meminfo. free * 1.0 / ( 1024 ** 3 ) ) )
if meminfo. free * 1.0 / ( 1024 ** 3 ) > min_memory:
flag[ i- 1 ] = 0
else :
flag[ i- 1 ] = 1
if 0 in flag:
free_num = 0
for i in flag:
if i == 0 :
free_num += 1
return free_num
else :
print ( "no free card!" )
return - 1
nvidia_ids = [ 0 , 1 ]
min_memory = 8
while True :
flag = watch_nvidia( nvidia_ids, min_memory)
if flag >= 1 :
send_msg( "xxxxxxx@bjtu.edu.cn" , "{}张显卡空闲" . format ( flag) )
break
time. sleep( 10 )
4.后台运行
nohup python nvidia. py > nvidia. out&