nvidia-smi提供了显卡使用相关的一系列信息,但是其在最为关键的命令信息却十分简洁,当有多块卡时,清一色的python或者caffe很难分清哪个是谁起的,此外有的程序占用多张卡跑,重复的显示也没有必要。
# coding=utf-8
import os
import sys
import re
import pwd
import time
import psutil
import subprocess
import operator
def clear():
# print("\033[H\033[J")()
sys.stdout.write("\x1b[2J\x1b[H")
def get_owner(pid):
try:
for line in open('/proc/%d/status' % pid):
if line.startswith('Uid:'):
uid = int(line.split()[1])
return pwd.getpwuid(uid).pw_name
except:
return None
def get_cmd(pid):
process = psutil.Process(int(pid))
cmd = process.cwd()
for e in process.cmdline():
cmd += " " + e
return cmd
def is_train(name):
trains = ["python", "caffe", "python3"]
for train in trains:
if name.find(train) >= 0:
return True
return False
def get_info():
gpus = []
msg = subprocess.Popen('nvidia-smi', stdout=subprocess.PIPE).stdout.read().decode()
msg = msg.strip().split('\n')
lino = 8
while True:
status = re.findall('.*\d+C.*\d+W / +\d+W.* +(\d+)MiB / +(\d+)MiB.* +(\d+%).*', msg[lino])
if status == []: break
mem_usage, mem_total, usage = status[0]
gpus.append(mem_usage + "M/" + mem_total + "M\t" + usage)
lino += 3
lino = -1
maps = {}
while True:
lino -= 1
status = re.findall('\| +(\d+) +(\d+) +\w+ +([^ ]*) +(\d+)MiB \|', msg[lino])
if status == []:
break
gpuid, pid, _, mem_usage = status[0]
if pid in maps.keys():
maps[pid] = str(gpuid) + "," + maps[pid]
else:
maps[pid] = str(gpuid) + "\t" + pid + "\t" + mem_usage + "M"
maps = sorted(maps.items(), key=operator.itemgetter(1), reverse=True)
lines = []
for pid in maps:
cmd = get_cmd(pid[0])
if (is_train(cmd)):
line = pid[1] + "\t" + cmd
lines.append((line))
lines.reverse()
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
line = ""
for i, g in enumerate(gpus):
line += str(i) + ":" + g + " \t"
if i % 4 == 3:
line += "\n"
print(line[:-1])
print("gpu\tpid\tmemusage\tdir\tcmd")
for line in lines:
print(line)
def main():
while True:
clear()
get_info()
# time.sleep(1)
if __name__ == "__main__":
# main()
get_info()