Created
November 4, 2024 13:00
-
-
Save kashifulhaque/73a3f65dc203687498f186af5a54c516 to your computer and use it in GitHub Desktop.
GPU Compatibility checker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import numpy as np | |
def check_gpu_capabilities(): | |
print("=== GPU Information ===") | |
if not torch.cuda.is_available(): | |
print("❌ CUDA is not available on this system") | |
return | |
device_count = torch.cuda.device_count() | |
print(f"✓ Found {device_count} CUDA device(s)") | |
for i in range(device_count): | |
device = torch.cuda.device(i) | |
properties = torch.cuda.get_device_properties(device) | |
print(f"\nDevice {i}: {properties.name}") | |
print(f"Compute Capability: {properties.major}.{properties.minor}") | |
print(f"Total Memory: {properties.total_memory / 1024**3:.2f} GB") | |
fp16_supported = properties.major >= 6 or (properties.major == 5 and properties.minor >= 3) | |
print(f"FP16 Support: {'✓' if fp16_supported else '❌'}") | |
bf16_supported = properties.major >= 8 | |
print(f"BF16 Support: {'✓' if bf16_supported else '❌'}") | |
if fp16_supported: | |
try: | |
a = torch.ones((2,2), dtype=torch.float16, device=f'cuda:{i}') | |
b = a + a | |
print("✓ FP16 Operations verified") | |
except RuntimeError as e: | |
print(f"❌ FP16 Operations failed: {str(e)}") | |
print("\nRunning quick memory bandwidth test...") | |
torch.cuda.empty_cache() | |
size = 100_000_000 | |
x = torch.randn(size, dtype=torch.float32, device=f'cuda:{i}') | |
torch.cuda.synchronize() | |
start = torch.cuda.Event(enable_timing=True) | |
end = torch.cuda.Event(enable_timing=True) | |
start.record() | |
y = x + x | |
end.record() | |
torch.cuda.synchronize() | |
fp32_time = start.elapsed_time(end) | |
if fp16_supported: | |
x_half = torch.randn(size, dtype=torch.float16, device=f'cuda:{i}') | |
torch.cuda.synchronize() | |
start.record() | |
y_half = x_half + x_half | |
end.record() | |
torch.cuda.synchronize() | |
fp16_time = start.elapsed_time(end) | |
print(f"FP32 operation time: {fp32_time:.2f}ms") | |
print(f"FP16 operation time: {fp16_time:.2f}ms") | |
print(f"FP16 speedup: {fp32_time/fp16_time:.2f}x") | |
def get_training_recommendations(): | |
if not torch.cuda.is_available(): | |
return | |
device = torch.cuda.current_device() | |
properties = torch.cuda.get_device_properties(device) | |
print("\n=== Training Recommendations ===") | |
if properties.major >= 6 or (properties.major == 5 and properties.minor >= 3): | |
print(""" | |
Recommended PyTorch training configuration: | |
training_args = TrainingArguments( | |
fp16=True, # Enable mixed precision training | |
fp16_opt_level='O1', # Mixed precision optimization level | |
fp16_backend='auto', # Let PyTorch choose the backend | |
per_device_train_batch_size=8, | |
gradient_accumulation_steps=4, | |
) | |
""") | |
else: | |
print(""" | |
Recommended PyTorch training configuration: | |
training_args = TrainingArguments( | |
fp16=False, # FP16 not supported, using FP32 | |
per_device_train_batch_size=4, # Smaller batch size due to memory constraints | |
gradient_accumulation_steps=8, | |
) | |
""") | |
if __name__ == "__main__": | |
check_gpu_capabilities() | |
get_training_recommendations() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment