占卡程序
一个用于持续占用 GPU 并进行简单算力压测的 PyTorch 脚本。
使用前说明
- 默认
--n 40960会非常吃显存(尤其是多卡同时跑时)。如果你的卡不是大显存,建议先从--n 8192或--n 12288试起。 - 脚本会把所有可见 GPU 都占满。如果只想占部分卡,用
CUDA_VISIBLE_DEVICES=...限制可见设备。
🚀 快速使用
python occupy_gpu.py
occupy_gpu.py:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import time
import torch
import torch.multiprocessing as mp
def worker(local_id: int, n: int, dtype: str, report: float):
torch.cuda.set_device(local_id)
device = f"cuda:{local_id}"
if dtype == "fp16":
dt = torch.float16
elif dtype == "bf16":
dt = torch.bfloat16
else:
dt = torch.float32
A = torch.randn((n, n), device=device, dtype=dt)
B = torch.randn((n, n), device=device, dtype=dt)
C = torch.empty((n, n), device=device, dtype=dt)
for _ in range(5): # warmup
torch.mm(A, B, out=C)
torch.cuda.synchronize()
t_start = time.time()
t_last = t_start
it = 0
while True:
torch.mm(A, B, out=C)
it += 1
now = time.time()
if report > 0 and (now - t_last) >= report:
torch.cuda.synchronize()
total_elapsed = now - t_start
it_per_sec = it / total_elapsed if total_elapsed > 0 else 0.0
flops = 2 * (n**3) * it_per_sec
tflops = flops / 1e12
mem_alloc = torch.cuda.memory_allocated(device) / 1024**3
mem_reserved = torch.cuda.memory_reserved(device) / 1024**3
print(
f"[GPU {local_id}] "
f"iters={it} | "
f"{it_per_sec:.2f} it/s | "
f"{tflops:.2f} TFLOPS | "
f"mem={mem_alloc:.2f}G/{mem_reserved:.2f}G | "
f"time={total_elapsed:.1f}s",
flush=True,
)
t_last = now
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--n", type=int, default=40960)
ap.add_argument("--dtype", type=str, default="fp16", choices=["fp16", "bf16", "fp32"])
ap.add_argument("--report", type=float, default=0.0)
args = ap.parse_args()
ngpu = torch.cuda.device_count()
if ngpu <= 0:
raise RuntimeError("No CUDA device found. Check your driver/CUDA/PyTorch installation.")
mp.set_start_method("spawn", force=True)
procs = []
for i in range(ngpu):
p = mp.Process(target=worker, args=(i, args.n, args.dtype, args.report))
p.start()
procs.append(p)
for p in procs:
p.join()
if __name__ == "__main__":
main()
💡 小技巧
- 只占用部分 GPU
CUDA_VISIBLE_DEVICES=0,1 python occupy_gpu.py