Skip to content

Instantly share code, notes, and snippets.

@kashifulhaque
Created November 4, 2024 13:00
Show Gist options
  • Save kashifulhaque/73a3f65dc203687498f186af5a54c516 to your computer and use it in GitHub Desktop.
Save kashifulhaque/73a3f65dc203687498f186af5a54c516 to your computer and use it in GitHub Desktop.

Revisions

  1. kashifulhaque created this gist Nov 4, 2024.
    97 changes: 97 additions & 0 deletions gpu-compatibility-checker.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,97 @@
    import torch
    import numpy as np

    def check_gpu_capabilities():
    print("=== GPU Information ===")

    if not torch.cuda.is_available():
    print("❌ CUDA is not available on this system")
    return

    device_count = torch.cuda.device_count()
    print(f"✓ Found {device_count} CUDA device(s)")

    for i in range(device_count):
    device = torch.cuda.device(i)
    properties = torch.cuda.get_device_properties(device)

    print(f"\nDevice {i}: {properties.name}")
    print(f"Compute Capability: {properties.major}.{properties.minor}")
    print(f"Total Memory: {properties.total_memory / 1024**3:.2f} GB")

    fp16_supported = properties.major >= 6 or (properties.major == 5 and properties.minor >= 3)
    print(f"FP16 Support: {'✓' if fp16_supported else '❌'}")

    bf16_supported = properties.major >= 8
    print(f"BF16 Support: {'✓' if bf16_supported else '❌'}")

    if fp16_supported:
    try:
    a = torch.ones((2,2), dtype=torch.float16, device=f'cuda:{i}')
    b = a + a
    print("✓ FP16 Operations verified")
    except RuntimeError as e:
    print(f"❌ FP16 Operations failed: {str(e)}")

    print("\nRunning quick memory bandwidth test...")
    torch.cuda.empty_cache()
    size = 100_000_000

    x = torch.randn(size, dtype=torch.float32, device=f'cuda:{i}')
    torch.cuda.synchronize()
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    start.record()
    y = x + x
    end.record()
    torch.cuda.synchronize()
    fp32_time = start.elapsed_time(end)

    if fp16_supported:
    x_half = torch.randn(size, dtype=torch.float16, device=f'cuda:{i}')
    torch.cuda.synchronize()
    start.record()
    y_half = x_half + x_half
    end.record()
    torch.cuda.synchronize()
    fp16_time = start.elapsed_time(end)

    print(f"FP32 operation time: {fp32_time:.2f}ms")
    print(f"FP16 operation time: {fp16_time:.2f}ms")
    print(f"FP16 speedup: {fp32_time/fp16_time:.2f}x")
    def get_training_recommendations():
    if not torch.cuda.is_available():
    return

    device = torch.cuda.current_device()
    properties = torch.cuda.get_device_properties(device)

    print("\n=== Training Recommendations ===")

    if properties.major >= 6 or (properties.major == 5 and properties.minor >= 3):
    print("""
    Recommended PyTorch training configuration:
    training_args = TrainingArguments(
    fp16=True, # Enable mixed precision training
    fp16_opt_level='O1', # Mixed precision optimization level
    fp16_backend='auto', # Let PyTorch choose the backend
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    )
    """)
    else:
    print("""
    Recommended PyTorch training configuration:
    training_args = TrainingArguments(
    fp16=False, # FP16 not supported, using FP32
    per_device_train_batch_size=4, # Smaller batch size due to memory constraints
    gradient_accumulation_steps=8,
    )
    """)

    if __name__ == "__main__":
    check_gpu_capabilities()
    get_training_recommendations()