查看显卡运行程序的详细信息

nvidia-smi提供了显卡使用相关的一系列信息,但是其在最为关键的命令信息却十分简洁,当有多块卡时,清一色的python或者caffe很难分清哪个是谁起的,此外有的程序占用多张卡跑,重复的显示也没有必要。

# coding=utf-8
import os
import sys
import re
import pwd
import time
import psutil
import subprocess
import operator


def clear():
    # print("\033[H\033[J")()
    sys.stdout.write("\x1b[2J\x1b[H")


def get_owner(pid):
    try:
        for line in open('/proc/%d/status' % pid):
            if line.startswith('Uid:'):
                uid = int(line.split()[1])
                return pwd.getpwuid(uid).pw_name
    except:
        return None


def get_cmd(pid):
    process = psutil.Process(int(pid))
    cmd = process.cwd()
    for e in process.cmdline():
        cmd += " " + e
    return cmd


def is_train(name):
    trains = ["python", "caffe", "python3"]
    for train in trains:
        if name.find(train) >= 0:
            return True
    return False


def get_info():
    gpus = []
    msg = subprocess.Popen('nvidia-smi', stdout=subprocess.PIPE).stdout.read().decode()
    msg = msg.strip().split('\n')
    lino = 8
    while True:
        status = re.findall('.*\d+C.*\d+W / +\d+W.* +(\d+)MiB / +(\d+)MiB.* +(\d+%).*', msg[lino])
        if status == []: break
        mem_usage, mem_total, usage = status[0]
        gpus.append(mem_usage + "M/" + mem_total + "M\t" + usage)
        lino += 3
    lino = -1
    maps = {}
    while True:
        lino -= 1
        status = re.findall('\| +(\d+) +(\d+) +\w+ +([^ ]*) +(\d+)MiB \|', msg[lino])
        if status == []:
            break
        gpuid, pid, _, mem_usage = status[0]
        if pid in maps.keys():
            maps[pid] = str(gpuid) + "," + maps[pid]
        else:
            maps[pid] = str(gpuid) + "\t" + pid + "\t" + mem_usage + "M"
    maps = sorted(maps.items(), key=operator.itemgetter(1), reverse=True)
    lines = []
    for pid in maps:
        cmd = get_cmd(pid[0])
        if (is_train(cmd)):
            line = pid[1] + "\t" + cmd
            lines.append((line))
    lines.reverse()
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    line = ""
    for i, g in enumerate(gpus):
        line += str(i) + ":" + g + "    \t"
        if i % 4 == 3:
            line += "\n"
    print(line[:-1])
    print("gpu\tpid\tmemusage\tdir\tcmd")
    for line in lines:
        print(line)


def main():
    while True:
        clear()
        get_info()
        # time.sleep(1)


if __name__ == "__main__":
    # main()
    get_info()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值