201 lines
6.9 KiB
Markdown
201 lines
6.9 KiB
Markdown
You're currently on CT 108 (openclaw) console. Here is the **exact complete script** that was executed on your PVE host — you can save this and re-run it on any fresh PVE node with RTX 3060s and the CUDA 13.0 local repo already present: [192.168.10](https://192.168.10.2:8006/#v1:0:=lxc%2F108:4:=jsconsole:=contentRootdir::::=consolejs:)
|
|
|
|
***
|
|
|
|
```bash
|
|
#!/usr/bin/env bash
|
|
# ============================================================
|
|
# PVE NVIDIA RTX 3060 Driver Setup Script
|
|
# Tested on: Proxmox VE 9.1.6 | Kernel 6.14.11-5-pve
|
|
# NVIDIA Driver: 580.76.05 | CUDA: 13.0
|
|
# ============================================================
|
|
set -euo pipefail
|
|
|
|
LOG=/root/pve-nvidia-install.log
|
|
exec > >(tee -a "$LOG") 2>&1
|
|
echo "=== PVE NVIDIA Setup Started: $(date) ==="
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 1 — Environment audit (read-only)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Environment Info ---"
|
|
pveversion -v
|
|
uname -r
|
|
lspci | grep -i nvidia || echo "WARNING: No NVIDIA GPU detected"
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 2 — Blacklist nouveau
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Blacklisting nouveau ---"
|
|
cat > /etc/modprobe.d/blacklist-nouveau.conf << 'EOF'
|
|
blacklist nouveau
|
|
options nouveau modeset=0
|
|
EOF
|
|
echo "Written: /etc/modprobe.d/blacklist-nouveau.conf"
|
|
|
|
# Rebuild initramfs so blacklist is baked in on next boot
|
|
update-initramfs -u -k all
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 3 — NVIDIA modprobe options (headless server, minimal)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Writing /etc/modprobe.d/nvidia.conf ---"
|
|
cat > /etc/modprobe.d/nvidia.conf << 'EOF'
|
|
# Headless PVE host - minimal NVIDIA options
|
|
options nvidia-drm modeset=1
|
|
options nvidia NVreg_UsePageAttributeTable=1
|
|
EOF
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 4 — Add NVIDIA modules to /etc/modules (load on boot)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Adding nvidia modules to /etc/modules ---"
|
|
if ! grep -q 'nvidia' /etc/modules; then
|
|
cat >> /etc/modules << 'EOF'
|
|
nvidia
|
|
nvidia_uvm
|
|
nvidia_drm
|
|
EOF
|
|
echo "Added nvidia, nvidia_uvm, nvidia_drm to /etc/modules"
|
|
else
|
|
echo "nvidia already in /etc/modules — skipped"
|
|
fi
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 5 — Install open kernel DKMS module
|
|
# (nvidia-kernel-open-dkms provides nvidia-kernel-580.65.06
|
|
# which satisfies the nvidia-driver dependency)
|
|
# NOTE: Temporarily hides 6.17 build dir so DKMS skips it
|
|
# (NVIDIA 580.x is incompatible with kernel 6.17 API)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Installing nvidia-kernel-open-dkms ---"
|
|
|
|
# Hide 6.17 kernel build dir so DKMS doesn't fail on it
|
|
HIDDEN_BUILD=""
|
|
if [ -L /lib/modules/6.17.13-1-pve/build ]; then
|
|
mv /lib/modules/6.17.13-1-pve/build \
|
|
/lib/modules/6.17.13-1-pve/build.disabled
|
|
HIDDEN_BUILD="6.17.13-1-pve"
|
|
echo "Temporarily hidden: /lib/modules/6.17.13-1-pve/build"
|
|
fi
|
|
|
|
DEBIAN_FRONTEND=noninteractive apt install -y nvidia-kernel-open-dkms
|
|
|
|
# Restore the 6.17 build symlink immediately after
|
|
if [ -n "$HIDDEN_BUILD" ]; then
|
|
mv /lib/modules/${HIDDEN_BUILD}/build.disabled \
|
|
/lib/modules/${HIDDEN_BUILD}/build
|
|
echo "Restored: /lib/modules/${HIDDEN_BUILD}/build"
|
|
fi
|
|
|
|
# Fix any dpkg half-configured state (hide 6.17 again for dpkg --configure)
|
|
if [ -L /lib/modules/6.17.13-1-pve/build ]; then
|
|
mv /lib/modules/6.17.13-1-pve/build \
|
|
/lib/modules/6.17.13-1-pve/build.disabled
|
|
fi
|
|
dpkg --configure -a 2>&1 || true
|
|
if [ -f /lib/modules/6.17.13-1-pve/build.disabled ]; then
|
|
mv /lib/modules/6.17.13-1-pve/build.disabled \
|
|
/lib/modules/6.17.13-1-pve/build
|
|
fi
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 6 — Install full NVIDIA driver stack
|
|
# (brings nvidia-smi, libcuda1, nvidia-persistenced,
|
|
# nvidia-driver-cuda, and all userspace libs)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Installing nvidia-driver + nvidia-driver-cuda ---"
|
|
DEBIAN_FRONTEND=noninteractive apt install -y \
|
|
nvidia-driver \
|
|
nvidia-driver-cuda \
|
|
nvidia-modprobe \
|
|
nvidia-smi \
|
|
libnvidia-sandboxutils
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 7 — DKMS exclusion for kernel 6.17.x
|
|
# Prevents future build failures when apt upgrades
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Writing DKMS kernel exclusion for 6.17.x ---"
|
|
mkdir -p /etc/dkms
|
|
cat > /etc/dkms/nvidia-kernel-open.conf << 'EOF'
|
|
# NVIDIA 580.x is incompatible with kernel 6.17 API changes.
|
|
# Remove this file once a newer NVIDIA driver supporting 6.17 is available.
|
|
EXCLUDED_KERNEL_VERSIONS="6\.17\..*"
|
|
EOF
|
|
echo "Written: /etc/dkms/nvidia-kernel-open.conf"
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 8 — Create nvidia-modprobe device nodes now (no reboot needed)
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Creating /dev/nvidia* device nodes ---"
|
|
nvidia-modprobe -u -c=0 || true
|
|
|
|
# ------------------------------------------------------------
|
|
# STEP 9 — Verify
|
|
# ------------------------------------------------------------
|
|
echo ""
|
|
echo "--- Verification ---"
|
|
echo "=== lsmod | grep nvidia ==="
|
|
lsmod | grep -E '^nvidia' || echo "WARNING: No nvidia modules loaded (may need reboot)"
|
|
|
|
echo ""
|
|
echo "=== /dev/nvidia* devices ==="
|
|
ls -la /dev/nvidia* /dev/nvidia-caps/* 2>/dev/null || echo "WARNING: No /dev/nvidia* found"
|
|
|
|
echo ""
|
|
echo "=== DKMS status ==="
|
|
dkms status
|
|
|
|
echo ""
|
|
echo "=== nvidia-smi ==="
|
|
nvidia-smi || echo "WARNING: nvidia-smi failed"
|
|
|
|
echo ""
|
|
echo "=== Files modified ==="
|
|
echo " /etc/modprobe.d/blacklist-nouveau.conf"
|
|
echo " /etc/modprobe.d/nvidia.conf"
|
|
echo " /etc/modules (appended: nvidia, nvidia_uvm, nvidia_drm)"
|
|
echo " /etc/dkms/nvidia-kernel-open.conf"
|
|
echo " /boot/initrd.img-* (regenerated by update-initramfs)"
|
|
|
|
echo ""
|
|
echo "=== PVE NVIDIA Setup Complete: $(date) ==="
|
|
echo "Log saved to: $LOG"
|
|
```
|
|
|
|
***
|
|
|
|
### How to use it on a fresh PVE node
|
|
|
|
```bash
|
|
# 1. Save it
|
|
nano /root/pve-nvidia-setup.sh
|
|
|
|
# 2. Make executable
|
|
chmod +x /root/pve-nvidia-setup.sh
|
|
|
|
# 3. Run it
|
|
/root/pve-nvidia-setup.sh
|
|
|
|
# 4. Check the log anytime
|
|
cat /root/pve-nvidia-install.log
|
|
```
|
|
|
|
### Prerequisites before running
|
|
|
|
This script assumes the **CUDA 13.0 local repo** is already present at `/var/cuda-repo-debian12-13-0-local/` (as it was on your node). If starting completely fresh on a new PVE node, you'd first need to download and install the CUDA local repo `.deb` from NVIDIA, then run this script.
|
|
|
|
### What the script does NOT do (by design)
|
|
- Does **not** touch any Proxmox packages or kernel
|
|
- Does **not** modify any LXC configs
|
|
- Does **not** install anything inside containers
|
|
- Does **not** require a reboot (modules were already loaded on your node) |