Skip to content

Commit

Permalink
add gpu monitor at web app (#50)
Browse files Browse the repository at this point in the history
Thank you for taking the time to review this PR. Please handle it in the
way you see fit. I would appreciate any suggestions to help me further
improve my coding skills.


https://github.com/user-attachments/assets/4289dee8-894b-42b6-bcad-1b0653a6bde4
  • Loading branch information
TITC authored Oct 8, 2024
1 parent 2d6d252 commit d515ec5
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 16 deletions.
1 change: 1 addition & 0 deletions models/Qwen2.5-7B-Instruct-v8-k65536-65536-woft
Submodule Qwen2.5-7B-Instruct-v8-k65536-65536-woft added at ce2453
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ torch
transformers>=4.45
safetensors
psutil
accelerate
accelerate
gradio
plotly==5.9.0
40 changes: 25 additions & 15 deletions vptq/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import gradio as gr

from vptq.app_gpu import update_charts as _update_charts
from vptq.app_utils import get_chat_loop_generator

chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
Expand Down Expand Up @@ -48,21 +49,30 @@ def respond(
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Blocks(fill_height=True) as demo:
with gr.Row():

def update_chart():
return _update_charts(chart_height=200)

gpu_chart = gr.Plot(update_chart, every=0.01) # update every 0.01 seconds

with gr.Column():
chat_interface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)

if __name__ == "__main__":
share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
Expand Down
172 changes: 172 additions & 0 deletions vptq/app_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import re
import subprocess
from collections import deque

import gradio as gr
import plotly.graph_objs as go

# Queues for storing historical data (saving the last 20 GPU utilization and memory usage values)
gpu_util_history = deque(maxlen=20)
mem_usage_history = deque(maxlen=20)


def get_nvidia_smi_info():
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, text=True)
return result.stdout


def parse_nvidia_smi_output(output):
gpu_info = {}
utilization = re.search(r'(\d+)%\s+Default', output)
mem_used = re.search(r'(\d+)MiB / (\d+)MiB', output)
temp = re.search(r'(\d+)C', output)
power = re.search(r'(\d+)\s*/\s*(\d+)\s*W', output)
gpu_clock = re.search(r'(\d+)MHz\s+MEM\s+(\d+)MHz', output)

if utilization:
gpu_info['gpu_util'] = int(utilization.group(1))
if mem_used:
gpu_info['mem_used'] = int(mem_used.group(1))
gpu_info['mem_total'] = int(mem_used.group(2))
gpu_info['mem_percent'] = gpu_info['mem_used'] / gpu_info['mem_total'] * 100
if temp:
gpu_info['temp'] = int(temp.group(1))
if power:
gpu_info['power_used'] = int(power.group(1))
gpu_info['power_max'] = int(power.group(2))
if gpu_clock:
gpu_info['gpu_clock'] = int(gpu_clock.group(1))
gpu_info['mem_clock'] = int(gpu_clock.group(2))

return gpu_info


def update_charts(chart_height: int = 200) -> go.Figure:
"""
Update the GPU utilization and memory usage charts.
Args:
chart_height (int, optional): used to set the height of the chart. Defaults to 200.
Returns:
plotly.graph_objs.Figure: The updated figure containing the GPU and memory usage charts.
"""
# obtain GPU information
output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)

# records the latest GPU utilization and memory usage values
gpu_util = round(gpu_info.get('gpu_util', 0), 1)
mem_percent = round(gpu_info.get('mem_percent', 0), 1)
gpu_util_history.append(gpu_util)
mem_usage_history.append(mem_percent)

# create GPU utilization line chart
gpu_trace = go.Scatter(y=list(gpu_util_history),
mode='lines+markers+text',
name='GPU Utilization (%)',
text=list(gpu_util_history),
textposition='top center')

# create memory usage line chart
mem_trace = go.Scatter(y=list(mem_usage_history),
mode='lines+markers+text',
name='Memory Usage (%)',
text=list(mem_usage_history),
textposition='top center')

# set the layout of the chart
layout = go.Layout(
# title="Real-time GPU Stats",
xaxis=dict(title=None, showticklabels=False, ticks=''),
yaxis=dict(
title='Percentage (%)',
range=[-5, 110] # adjust the range of the y-axis
),
height=chart_height, # set the height of the chart
margin=dict(l=10, r=10, t=0, b=0) # set the margin of the chart
)

fig = go.Figure(data=[gpu_trace, mem_trace], layout=layout)
return fig


def mem_bar(used: float, total: float) -> str:
"""
Generates a memory usage bar.
Args:
used (float): The amount of memory used in GiB.
total (float): The total amount of memory available in GiB.
Returns:
str: A string representing the memory usage bar in HTML format.
"""
bar_length = 50
used_bars = int(bar_length * used / total)
bar = '|' * used_bars + ' ' * (bar_length - used_bars)
return f"<span style='color: green;'>MEM[{bar}{used:.3f}Gi/{total:.3f}Gi]</span>"


def refresh_gpu_data():
"""
Refreshes and returns the current GPU data in an HTML formatted string.
Returns:
str: An HTML formatted string containing the GPU information, including
GPU clock speed, memory clock speed, temperature, power usage,
GPU utilization, and memory usage.
"""

output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)

gpu_clock = gpu_info.get('gpu_clock', 'N/A')
mem_clock = gpu_info.get('mem_clock', 'N/A')
temp = gpu_info.get('temp', 'N/A')
power_used = gpu_info.get('power_used', 'N/A')
power_max = gpu_info.get('power_max', 'N/A')
gpu_util = gpu_info.get('gpu_util', 0)
mem_used = gpu_info.get('mem_used', 0) / 1024 # MiB to GiB
mem_total = gpu_info.get('mem_total', 0) / 1024 # MiB to GiB

gpu_info_display = (f"<div style='font-family: monospace;'>"
f"<b style='color: yellow;'>Device 0</b> "
f"[<span style='color: cyan;'>NVIDIA A100 80GB PCIe</span>] "
f"PCIe GEN 4@16x RX: <b>0.000 KiB/s</b> TX: <b>0.000 KiB/s</b><br>"
f"GPU <b>{gpu_clock}MHz</b> MEM <b>{mem_clock}MHz</b> "
f"TEMP <b style='color: orange;'>{temp}°C</b> FAN <b>N/A%</b> "
f"POW <b style='color: red;'>{power_used} / {power_max} W</b><br>"
f"GPU[<b>{gpu_util}%</b>] {mem_bar(mem_used, mem_total)}"
f"</div>")

return gpu_info_display


def initialize_history():
"""
Initializes the GPU utilization and memory usage history.
"""
for _ in range(20):
output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)
gpu_util_history.append(round(gpu_info.get('gpu_util', 0), 1))
mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))


if __name__ == "__main__":
# set the update interval of the GPU information
time_interval = 0.01
# create the GPU information display and chart
with gr.Blocks() as demo:
# Flickering issue exists, temporarily commented out
gpu_info_display = gr.HTML(refresh_gpu_data, every=time_interval, elem_id="gpu_info")
initialize_history()
gpu_chart = gr.Plot(update_charts, every=time_interval)
# avoid the up and down movement of the GPU information
demo.css = """
#gpu_info {
height: 100px;
overflow: hidden;
}
"""
demo.launch()

0 comments on commit d515ec5

Please sign in to comment.