GPU monitoring (AMD / ATI) #993

This commit is contained in:
nicolargo 2024-04-07 15:32:24 +02:00
parent 71e8fe5bfd
commit 0ca1334791
8 changed files with 319 additions and 180 deletions

View File

@ -139,6 +139,10 @@ proc_critical=90
mem_careful=50
mem_warning=70
mem_critical=90
# Temperature
temperature_careful=60
temperature_warning=70
temperature_critical=80
[mem]
disable=False
@ -312,8 +316,8 @@ disable=False
#hide=ambient
# Sensors core thresholds (in Celsius...)
# Default values are grabbed from the system
#temperature_core_careful=60
#temperature_core_warning=70
#temperature_core_careful=45
#temperature_core_warning=65
#temperature_core_critical=80
# Temperatures threshold in °C for hddtemp
# Default values if not defined: 45/52/60

View File

@ -139,6 +139,10 @@ proc_critical=90
mem_careful=50
mem_warning=70
mem_critical=90
# Temperature
temperature_careful=60
temperature_warning=70
temperature_critical=80
[mem]
disable=False
@ -312,8 +316,8 @@ disable=False
#hide=ambient
# Sensors core thresholds (in Celsius...)
# Default values are grabbed from the system
#temperature_core_careful=60
#temperature_core_warning=70
#temperature_core_careful=45
#temperature_core_warning=65
#temperature_core_critical=80
# Temperatures threshold in °C for hddtemp
# Default values if not defined: 45/52/60
@ -443,7 +447,7 @@ disable=False
[alert]
disable=False
# Maximum number of alerts to display (default is 10)
# Maximum number of events to display (default is 10 events)
;max_events=10
# Minimum duration for an event to be taken into account (default is 6 seconds)
;min_duration=6

View File

@ -3,17 +3,16 @@
GPU
===
.. note::
You need to install the `nvidia-ml-py`_ library on your system.
Or `py3nvml`_ for Glances 3.4.0.2 or lower.
Or `nvidia-ml-py3`_ for Glances 3.1.3 or lower.
For the moment, only following GPU are supported:
- NVidia (thanks to the `nvidia-ml-py`_ library)
- AMD (only on Linux Operating system with kernel 5.14 or higher)
The GPU stats are shown as a percentage of value and for the configured
refresh time. It displays:
- total GPU usage
- memory consumption
- temperature (Glances 3.1.4 or higher)
- temperature
.. image:: ../_static/gpu.png
@ -38,6 +37,10 @@ You can change the threshold limits in the configuration file:
mem_careful=50
mem_warning=70
mem_critical=90
# Temperature
temperature_careful=60
temperature_warning=70
temperature_critical=80
Legend:
@ -51,5 +54,3 @@ GPU (PROC/MEM) Status
============== ============
.. _nvidia-ml-py: https://pypi.org/project/nvidia-ml-py/
.. _py3nvml: https://pypi.org/project/py3nvml/
.. _nvidia-ml-py3: https://pypi.org/project/nvidia-ml-py3/

View File

@ -141,7 +141,7 @@ Get plugin stats::
"refresh": 3.0,
"regex": True,
"result": None,
"timer": 0.3346278667449951},
"timer": 0.44180941581726074},
{"count": 0,
"countmax": 20.0,
"countmin": None,
@ -150,7 +150,7 @@ Get plugin stats::
"refresh": 3.0,
"regex": True,
"result": None,
"timer": 0.3344759941101074}]
"timer": 0.4416372776031494}]
Fields descriptions:
@ -178,7 +178,7 @@ Get a specific item when field matches the given value::
"refresh": 3.0,
"regex": True,
"result": None,
"timer": 0.3346278667449951}]}
"timer": 0.44180941581726074}]}
GET cloud
---------
@ -226,18 +226,18 @@ Get plugin stats::
"engine": "docker",
"id": "3abd51c615968482d9ccff5afc629f267f6dda113ed68b75b432615fae3b49fb",
"image": ["portainer/portainer-ce:2.9.3"],
"io": {"cumulative_ior": 110592, "cumulative_iow": 962560},
"io": {"cumulative_ior": 483328, "cumulative_iow": 1462272},
"key": "name",
"memory": {"cache": None,
"limit": 7823568896,
"max_usage": None,
"rss": None,
"usage": 15851520},
"memory_usage": 15851520,
"usage": 13778944},
"memory_usage": 13778944,
"name": "portainer",
"network": {"cumulative_rx": 1803247, "cumulative_tx": 1636},
"network": {"cumulative_rx": 3417747, "cumulative_tx": 2196},
"status": "running",
"uptime": "6 days"}]
"uptime": "1 weeks"}]
Fields descriptions:
@ -273,18 +273,18 @@ Get a specific item when field matches the given value::
"engine": "docker",
"id": "3abd51c615968482d9ccff5afc629f267f6dda113ed68b75b432615fae3b49fb",
"image": ["portainer/portainer-ce:2.9.3"],
"io": {"cumulative_ior": 110592, "cumulative_iow": 962560},
"io": {"cumulative_ior": 483328, "cumulative_iow": 1462272},
"key": "name",
"memory": {"cache": None,
"limit": 7823568896,
"max_usage": None,
"rss": None,
"usage": 15851520},
"memory_usage": 15851520,
"usage": 13778944},
"memory_usage": 13778944,
"name": "portainer",
"network": {"cumulative_rx": 1803247, "cumulative_tx": 1636},
"network": {"cumulative_rx": 3417747, "cumulative_tx": 2196},
"status": "running",
"uptime": "6 days"}]}
"uptime": "1 weeks"}]}
GET core
--------
@ -311,19 +311,19 @@ Get plugin stats::
# curl http://localhost:61208/api/4/cpu
{"cpucore": 4,
"ctx_switches": 1083773007,
"ctx_switches": 1203403566,
"guest": 0.0,
"idle": 71.2,
"interrupts": 497643356,
"iowait": 0.0,
"idle": 65.0,
"interrupts": 548819767,
"iowait": 0.3,
"irq": 0.0,
"nice": 0.0,
"soft_interrupts": 222951171,
"soft_interrupts": 252581658,
"steal": 0.0,
"syscalls": 0,
"system": 3.1,
"total": 28.8,
"user": 25.8}
"system": 6.8,
"total": 34.6,
"user": 27.8}
Fields descriptions:
@ -356,7 +356,7 @@ Fields descriptions:
Get a specific field::
# curl http://localhost:61208/api/4/cpu/total
{"total": 28.8}
{"total": 34.6}
GET diskio
----------
@ -366,16 +366,16 @@ Get plugin stats::
# curl http://localhost:61208/api/4/diskio
[{"disk_name": "sda",
"key": "disk_name",
"read_bytes": 106861217280,
"read_count": 5848339,
"write_bytes": 209200316416,
"write_count": 2631934},
"read_bytes": 117153105920,
"read_count": 6366264,
"write_bytes": 235189641216,
"write_count": 2925624},
{"disk_name": "sda1",
"key": "disk_name",
"read_bytes": 24269824,
"read_count": 659,
"read_bytes": 24433664,
"read_count": 675,
"write_bytes": 0,
"write_count": 44}]
"write_count": 52}]
Fields descriptions:
@ -404,10 +404,10 @@ Get a specific item when field matches the given value::
# curl http://localhost:61208/api/4/diskio/disk_name/sda
{"sda": [{"disk_name": "sda",
"key": "disk_name",
"read_bytes": 106861217280,
"read_count": 5848339,
"write_bytes": 209200316416,
"write_count": 2631934}]}
"read_bytes": 117153105920,
"read_count": 6366264,
"write_bytes": 235189641216,
"write_count": 2925624}]}
GET folders
-----------
@ -434,13 +434,13 @@ Get plugin stats::
# curl http://localhost:61208/api/4/fs
[{"device_name": "/dev/mapper/ubuntu--gnome--vg-root",
"free": 35840458752,
"free": 35404374016,
"fs_type": "ext4",
"key": "mnt_point",
"mnt_point": "/",
"percent": 84.5,
"percent": 84.7,
"size": 243334156288,
"used": 195106242560},
"used": 195542327296},
{"device_name": "zsfpool",
"free": 31195136,
"fs_type": "zfs",
@ -469,13 +469,13 @@ Get a specific item when field matches the given value::
# curl http://localhost:61208/api/4/fs/mnt_point//
{"/": [{"device_name": "/dev/mapper/ubuntu--gnome--vg-root",
"free": 35840458752,
"free": 35404374016,
"fs_type": "ext4",
"key": "mnt_point",
"mnt_point": "/",
"percent": 84.5,
"percent": 84.7,
"size": 243334156288,
"used": 195106242560}]}
"used": 195542327296}]}
GET gpu
-------
@ -508,11 +508,11 @@ GET ip
Get plugin stats::
# curl http://localhost:61208/api/4/ip
{"address": "192.168.1.14",
"gateway": "192.168.1.1",
{"address": "192.168.172.139",
"gateway": "192.168.172.240",
"mask": "255.255.255.0",
"mask_cidr": 24,
"public_address": "92.151.148.66",
"public_address": "92.184.102.172",
"public_info_human": ""}
Fields descriptions:
@ -527,7 +527,7 @@ Fields descriptions:
Get a specific field::
# curl http://localhost:61208/api/4/ip/gateway
{"gateway": "192.168.1.1"}
{"gateway": "192.168.172.240"}
GET irq
-------
@ -548,7 +548,10 @@ GET load
Get plugin stats::
# curl http://localhost:61208/api/4/load
{"cpucore": 4, "min1": 0.1416015625, "min15": 0.970703125, "min5": 0.765625}
{"cpucore": 4,
"min1": 0.73486328125,
"min15": 1.60546875,
"min5": 1.31591796875}
Fields descriptions:
@ -560,7 +563,7 @@ Fields descriptions:
Get a specific field::
# curl http://localhost:61208/api/4/load/min1
{"min1": 0.1416015625}
{"min1": 0.73486328125}
GET mem
-------
@ -568,16 +571,16 @@ GET mem
Get plugin stats::
# curl http://localhost:61208/api/4/mem
{"active": 2535997440,
"available": 2432135168,
"buffers": 501940224,
"cached": 2622812160,
"free": 2432135168,
"inactive": 3695976448,
"percent": 68.9,
"shared": 652316672,
{"active": 2775687168,
"available": 1968508928,
"buffers": 58830848,
"cached": 2570166272,
"free": 1968508928,
"inactive": 3191869440,
"percent": 74.8,
"shared": 877522944,
"total": 7823568896,
"used": 5391433728}
"used": 5855059968}
Fields descriptions:
@ -604,13 +607,13 @@ GET memswap
Get plugin stats::
# curl http://localhost:61208/api/4/memswap
{"free": 6828023808,
"percent": 15.5,
"sin": 8335540224,
"sout": 16940646400,
{"free": 4853673984,
"percent": 39.9,
"sin": 9179410432,
"sout": 19760902144,
"time_since_update": 1,
"total": 8082419712,
"used": 1254395904}
"used": 3228745728}
Fields descriptions:
@ -635,15 +638,15 @@ Get plugin stats::
# curl http://localhost:61208/api/4/network
[{"alias": None,
"bytes_all": 0,
"bytes_all_gauge": 9770615278,
"bytes_all_gauge": 11407861656,
"bytes_recv": 0,
"bytes_recv_gauge": 9183565245,
"bytes_recv_gauge": 10711435586,
"bytes_sent": 0,
"bytes_sent_gauge": 587050033,
"bytes_sent_gauge": 696426070,
"interface_name": "wlp2s0",
"key": "interface_name",
"speed": 0,
"time_since_update": 0.22126293182373047},
"time_since_update": 0.3313474655151367},
{"alias": None,
"bytes_all": 0,
"bytes_all_gauge": 0,
@ -654,7 +657,7 @@ Get plugin stats::
"interface_name": "br-40875d2e2716",
"key": "interface_name",
"speed": 0,
"time_since_update": 0.22126293182373047}]
"time_since_update": 0.3313474655151367}]
Fields descriptions:
@ -690,15 +693,15 @@ Get a specific item when field matches the given value::
# curl http://localhost:61208/api/4/network/interface_name/wlp2s0
{"wlp2s0": [{"alias": None,
"bytes_all": 0,
"bytes_all_gauge": 9770615278,
"bytes_all_gauge": 11407861656,
"bytes_recv": 0,
"bytes_recv_gauge": 9183565245,
"bytes_recv_gauge": 10711435586,
"bytes_sent": 0,
"bytes_sent_gauge": 587050033,
"bytes_sent_gauge": 696426070,
"interface_name": "wlp2s0",
"key": "interface_name",
"speed": 0,
"time_since_update": 0.22126293182373047}]}
"time_since_update": 0.3313474655151367}]}
GET now
-------
@ -706,7 +709,7 @@ GET now
Get plugin stats::
# curl http://localhost:61208/api/4/now
"2024-04-06 17:45:25 CEST"
"2024-04-07 15:30:09 CEST"
GET percpu
----------
@ -717,7 +720,20 @@ Get plugin stats::
[{"cpu_number": 0,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 10.0,
"idle": 65.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 5.0,
"total": 35.0,
"user": 12.0},
{"cpu_number": 1,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 66.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
@ -725,21 +741,8 @@ Get plugin stats::
"softirq": 0.0,
"steal": 0.0,
"system": 4.0,
"total": 90.0,
"user": 52.0},
{"cpu_number": 1,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 53.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 2.0,
"total": 47.0,
"user": 11.0}]
"total": 34.0,
"user": 6.0}]
Fields descriptions:
@ -768,12 +771,12 @@ Get plugin stats::
# curl http://localhost:61208/api/4/ports
[{"description": "DefaultGateway",
"host": "192.168.1.1",
"host": "192.168.172.240",
"indice": "port_0",
"port": 0,
"refresh": 30,
"rtt_warning": None,
"status": 0.007423,
"status": 0.011248,
"timeout": 3}]
Fields descriptions:
@ -790,18 +793,18 @@ Fields descriptions:
Get a specific field::
# curl http://localhost:61208/api/4/ports/host
{"host": ["192.168.1.1"]}
{"host": ["192.168.172.240"]}
Get a specific item when field matches the given value::
# curl http://localhost:61208/api/4/ports/host/192.168.1.1
{"192.168.1.1": [{"description": "DefaultGateway",
"host": "192.168.1.1",
# curl http://localhost:61208/api/4/ports/host/192.168.172.240
{"192.168.172.240": [{"description": "DefaultGateway",
"host": "192.168.172.240",
"indice": "port_0",
"port": 0,
"refresh": 30,
"rtt_warning": None,
"status": 0.007423,
"status": 0.011248,
"timeout": 3}]}
GET processcount
@ -810,7 +813,7 @@ GET processcount
Get plugin stats::
# curl http://localhost:61208/api/4/processcount
{"pid_max": 0, "running": 1, "sleeping": 333, "thread": 1520, "total": 403}
{"pid_max": 0, "running": 1, "sleeping": 341, "thread": 1704, "total": 410}
Fields descriptions:
@ -823,7 +826,7 @@ Fields descriptions:
Get a specific field::
# curl http://localhost:61208/api/4/processcount/total
{"total": 403}
{"total": 410}
GET processlist
---------------
@ -863,17 +866,30 @@ GET quicklook
Get plugin stats::
# curl http://localhost:61208/api/4/quicklook
{"cpu": 28.8,
{"cpu": 34.6,
"cpu_hz": 2025000000.0,
"cpu_hz_current": 1723628500.0,
"cpu_hz_current": 2047700000.0,
"cpu_name": "Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz",
"cpucore": 4,
"load": 24.3,
"mem": 68.9,
"load": 40.1,
"mem": 74.8,
"percpu": [{"cpu_number": 0,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 10.0,
"idle": 65.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 5.0,
"total": 35.0,
"user": 12.0},
{"cpu_number": 1,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 66.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
@ -881,48 +897,35 @@ Get plugin stats::
"softirq": 0.0,
"steal": 0.0,
"system": 4.0,
"total": 90.0,
"user": 52.0},
{"cpu_number": 1,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 53.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 2.0,
"total": 47.0,
"user": 11.0},
"total": 34.0,
"user": 6.0},
{"cpu_number": 2,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 62.0,
"idle": 60.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 1.0,
"total": 38.0,
"user": 2.0},
"system": 8.0,
"total": 40.0,
"user": 11.0},
{"cpu_number": 3,
"guest": 0.0,
"guest_nice": 0.0,
"idle": 62.0,
"idle": 18.0,
"iowait": 0.0,
"irq": 0.0,
"key": "cpu_number",
"nice": 0.0,
"softirq": 0.0,
"steal": 0.0,
"system": 1.0,
"total": 38.0,
"user": 2.0}],
"swap": 15.5}
"system": 4.0,
"total": 82.0,
"user": 57.0}],
"swap": 39.9}
Fields descriptions:
@ -1042,7 +1045,7 @@ GET uptime
Get plugin stats::
# curl http://localhost:61208/api/4/uptime
"33 days, 8:44:36"
"34 days, 6:29:19"
GET version
-----------
@ -1102,34 +1105,34 @@ GET stats history
History of a plugin::
# curl http://localhost:61208/api/4/cpu/history
{"system": [["2024-04-06T17:45:26.720681", 3.1],
["2024-04-06T17:45:27.742109", 2.2],
["2024-04-06T17:45:28.929983", 2.2]],
"user": [["2024-04-06T17:45:26.720667", 25.8],
["2024-04-06T17:45:27.742099", 7.5],
["2024-04-06T17:45:28.929968", 7.5]]}
{"system": [["2024-04-07T15:30:11.275315", 6.8],
["2024-04-07T15:30:12.302948", 3.7],
["2024-04-07T15:30:13.522608", 3.7]],
"user": [["2024-04-07T15:30:11.275301", 27.8],
["2024-04-07T15:30:12.302931", 14.8],
["2024-04-07T15:30:13.522595", 14.8]]}
Limit history to last 2 values::
# curl http://localhost:61208/api/4/cpu/history/2
{"system": [["2024-04-06T17:45:27.742109", 2.2],
["2024-04-06T17:45:28.929983", 2.2]],
"user": [["2024-04-06T17:45:27.742099", 7.5],
["2024-04-06T17:45:28.929968", 7.5]]}
{"system": [["2024-04-07T15:30:12.302948", 3.7],
["2024-04-07T15:30:13.522608", 3.7]],
"user": [["2024-04-07T15:30:12.302931", 14.8],
["2024-04-07T15:30:13.522595", 14.8]]}
History for a specific field::
# curl http://localhost:61208/api/4/cpu/system/history
{"system": [["2024-04-06T17:45:25.061145", 3.1],
["2024-04-06T17:45:26.720681", 3.1],
["2024-04-06T17:45:27.742109", 2.2],
["2024-04-06T17:45:28.929983", 2.2]]}
{"system": [["2024-04-07T15:30:09.390787", 6.8],
["2024-04-07T15:30:11.275315", 6.8],
["2024-04-07T15:30:12.302948", 3.7],
["2024-04-07T15:30:13.522608", 3.7]]}
Limit history for a specific field to last 2 values::
# curl http://localhost:61208/api/4/cpu/system/history
{"system": [["2024-04-06T17:45:27.742109", 2.2],
["2024-04-06T17:45:28.929983", 2.2]]}
{"system": [["2024-04-07T15:30:12.302948", 3.7],
["2024-04-07T15:30:13.522608", 3.7]]}
GET limits (used for thresholds)
--------------------------------
@ -1184,6 +1187,9 @@ All limits/thresholds::
"gpu_proc_careful": 50.0,
"gpu_proc_critical": 90.0,
"gpu_proc_warning": 70.0,
"gpu_temperature_careful": 60.0,
"gpu_temperature_critical": 80.0,
"gpu_temperature_warning": 70.0,
"history_size": 1200.0},
"help": {"history_size": 1200.0},
"ip": {"history_size": 1200.0,

View File

@ -27,7 +27,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
.TH "GLANCES" "1" "Apr 06, 2024" "4.0.0_beta01" "Glances"
.TH "GLANCES" "1" "Apr 07, 2024" "4.0.0_beta01" "Glances"
.SH NAME
glances \- An eye on your system
.SH SYNOPSIS

View File

@ -77,6 +77,8 @@ class PluginModel(GlancesPluginModel):
# Init the GPU API
self.nvidia = NvidiaGPU()
self.amd = AmdGPU()
# Just for test purpose (uncomment to test on computer without AMD GPU)
# self.amd = AmdGPU(drm_root_folder='./test-data/plugins/gpu/amd/sys/class/drm')
# We want to display the stat in the curse interface
self.display_curse = True
@ -267,7 +269,7 @@ class PluginModel(GlancesPluginModel):
# New line
ret.append(self.curse_new_line())
# GPU ID + PROC + MEM + TEMPERATURE
id_msg = '{}'.format(gpu_stats['gpu_id'])
id_msg = '{:>7}'.format(gpu_stats['gpu_id'])
try:
proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
except (ValueError, TypeError):
@ -276,7 +278,7 @@ class PluginModel(GlancesPluginModel):
mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
except (ValueError, TypeError):
mem_msg = '{:>4}'.format('N/A')
msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
msg = '{} {} mem {}'.format(id_msg, proc_msg, mem_msg)
ret.append(self.curse_add_line(msg))
return ret

View File

@ -7,15 +7,55 @@
# SPDX-License-Identifier: LGPL-3.0-only
#
"""AMD Extension unit for Glances' GPU plugin."""
"""AMD Extension unit for Glances' GPU plugin.
The class grabs the stats from the /sys/class/drm/ directory.
See: https://wiki.archlinux.org/title/AMDGPU#Manually
"""
# Example
# test-data/plugins/gpu/amd/
# └── sys
# ├── class
# │   └── drm
# │   └── card0
# │   └── device
# │   ├── gpu_busy_percent
# │   ├── hwmon
# │   │   └── hwmon0
# │   │   └── temp1_input
# │   ├── mem_info_vram_total
# │   ├── mem_info_vram_used
# │   ├── pp_dpm_mclk
# │   └── pp_dpm_sclk
# └── kernel
# └── debug
# └── dri
# └── 0
# └── amdgpu_pm_info
from glances.logger import logger
import re
import os
DRM_ROOT_FOLDER: str = '/sys/class/drm'
CARD_REGEX: str = r"^card\d$"
DEVICE_FOLDER: str = 'device'
GPU_PROC_PERCENT: str = 'gpu_busy_percent'
GPU_MEM_TOTAL: str = 'mem_info_vram_total'
GPU_MEM_USED: str = 'mem_info_vram_used'
HWMON_REGEXP: str = r"^hwmon\d$"
GPU_TEMPERATURE_REGEXP: str = r"^temp\d_input"
class AmdGPU:
"""GPU card class."""
def __init__(self):
def __init__(self, drm_root_folder: str = DRM_ROOT_FOLDER):
"""Init AMD GPU card class."""
pass
self.drm_root_folder = drm_root_folder
self.device_folders = get_device_list(drm_root_folder)
def exit(self):
"""Close AMD GPU class."""
@ -24,4 +64,86 @@ class AmdGPU:
def get_device_stats(self):
"""Get AMD GPU stats."""
stats = []
for index, device in enumerate(self.device_folders):
device_stats = dict()
# Dictionary key is the GPU_ID
device_stats['key'] = 'gpu_id'
# GPU id (for multiple GPU, start at 0)
device_stats['gpu_id'] = f'amd{index}'
# GPU name
device_stats['name'] = get_device_name(device)
# Memory consumption in % (not available on all GPU)
device_stats['mem'] = get_mem(device)
# Processor consumption in %
device_stats['proc'] = get_proc(device)
# Processor temperature in °C
device_stats['temperature'] = get_temperature(device)
# Fan speed in %
device_stats['fan_speed'] = get_fan_speed(device)
stats.append(device_stats)
return stats
def get_device_list(drm_root_folder: str) -> list:
"""Return a list of path to the device stats."""
ret = []
for root, dirs, _ in os.walk(drm_root_folder):
for d in dirs:
if re.match(CARD_REGEX, d) and \
DEVICE_FOLDER in os.listdir(os.path.join(root, d)) and \
os.path.isfile(os.path.join(root, d, DEVICE_FOLDER, GPU_PROC_PERCENT)):
# If the GPU busy file is present then take the card into account
ret.append(os.path.join(root, d, DEVICE_FOLDER))
return ret
def get_device_name(device_folder: str) -> str:
"""Return the GPU name."""
return 'AMD GPU'
def get_mem(device_folder: str) -> int:
"""Return the memory consumption in %."""
mem_info_vram_total = os.path.join(device_folder, GPU_MEM_TOTAL)
mem_info_vram_used = os.path.join(device_folder, GPU_MEM_USED)
if os.path.isfile(mem_info_vram_total) and os.path.isfile(mem_info_vram_used):
with open(mem_info_vram_total) as f:
mem_info_vram_total = int(f.read())
with open(mem_info_vram_used) as f:
mem_info_vram_used = int(f.read())
if mem_info_vram_total > 0:
return round(mem_info_vram_used / mem_info_vram_total * 100)
return None
def get_proc(device_folder: str) -> int:
"""Return the processor consumption in %."""
gpu_busy_percent = os.path.join(device_folder, GPU_PROC_PERCENT)
if os.path.isfile(gpu_busy_percent):
with open(gpu_busy_percent) as f:
return int(f.read())
return None
def get_temperature(device_folder: str) -> int:
"""Return the processor temperature in °C (mean of all HWMON)"""
temp_input = []
for root, dirs, _ in os.walk(device_folder):
for d in dirs:
if re.match(HWMON_REGEXP, d):
for _, _, files in os.walk(os.path.join(root, d)):
for f in files:
if re.match(GPU_TEMPERATURE_REGEXP, f):
with open(os.path.join(root, d, f)) as f:
temp_input.append(int(f.read()))
if len(temp_input) > 0:
return round(sum(temp_input) / len(temp_input) / 1000)
else:
return None
def get_fan_speed(device_folder: str) -> int:
"""Return the fan speed in %."""
return None

View File

@ -14,11 +14,11 @@ from glances.globals import nativestr
try:
import pynvml
except Exception as e:
import_nvidia_error_tag = True
nvidia_gpu_enable = False
# Display debug message if import KeyError
logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
else:
import_nvidia_error_tag = False
nvidia_gpu_enable = True
class NvidiaGPU:
@ -26,12 +26,12 @@ class NvidiaGPU:
def __init__(self):
"""Init Nvidia GPU card class."""
if import_nvidia_error_tag:
if not nvidia_gpu_enable:
self.device_handles = []
else:
try:
pynvml.nvmlInit()
self.device_handles = get_device_handles()
self.device_handles = get_device_list()
except Exception:
logger.debug("pynvml could not be initialized.")
self.device_handles = []
@ -69,7 +69,7 @@ class NvidiaGPU:
return stats
def get_device_handles():
def get_device_list():
"""Get a list of NVML device handles, one per device.
Can throw NVMLError.