Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion rhel9/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
# Avoid dependency of container-toolkit for driver container
ENV NVIDIA_VISIBLE_DEVICES=void

# Fabric manager fabric mode, default is 0 (full-passthrough)
ARG FABRIC_MANAGER_FABRIC_MODE=0
ENV FABRIC_MANAGER_FABRIC_MODE=$FABRIC_MANAGER_FABRIC_MODE

ADD install.sh /tmp/

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
Expand All @@ -74,7 +78,19 @@ RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
versionArray=(${DRIVER_VERSION//./ }); \
DRIVER_BRANCH=${versionArray[0]}; \
dnf install git -y && \
dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
dnf module enable -y nvidia-driver:${DRIVER_BRANCH}-dkms && \
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1 nvidia-fabric-manager-devel-${DRIVER_VERSION}-1 libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1 jsoncpp-devel gcc-c++ make && \
git clone https://github.com/mresvanis/Fabric-Manager-Client.git && \
cd Fabric-Manager-Client && \
git checkout fix-ignoring-unix-socket && \
make fmpm && \
cp fmpm /usr/bin/ && \
chmod +x /usr/bin/fmpm; fi

# Fetch the installer, fabricmanager, libnvidia-nscq, libnvsdm, imex packages
RUN sh /tmp/install.sh extrapkgsinstall
Expand Down
111 changes: 91 additions & 20 deletions rhel9/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ RHEL_MAJOR_VERSION=9
RHEL_MINOR_VERSION=${RHEL_MINOR_VERSION:-""}
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
MODPROBE_CONFIG_DIR="/etc/modprobe.d"
FABRIC_MANAGER_FABRIC_MODE=${FABRIC_MANAGER_FABRIC_MODE:-0}

DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
echo "DRIVER_ARCH is $DRIVER_ARCH"
Expand Down Expand Up @@ -305,6 +306,87 @@ _ensure_nvlink5_prerequisites() (
done
)

_configure_fabric_manager_config() {
local fm_config_file="$1"
local fmpm_socket_path="$2"

if [ "${FABRIC_MANAGER_FABRIC_MODE}" = "1" ]; then
echo "Updating NVIDIA fabric manager configuration to fabric mode ${FABRIC_MANAGER_FABRIC_MODE}..."
sed -i "s/FABRIC_MODE=.*/FABRIC_MODE=${FABRIC_MANAGER_FABRIC_MODE}/g" $fm_config_file

echo "Updating NVIDIA fabric manager configuration to use a UNIX socket instead of TCP: ${fmpm_socket_path}"
sed -i "s|^UNIX_SOCKET_PATH=.*|UNIX_SOCKET_PATH=${fmpm_socket_path}|g" $fm_config_file
sed -i "s|^FM_CMD_UNIX_SOCKET_PATH=.*|FM_CMD_UNIX_SOCKET_PATH=${fmpm_socket_path}|g" $fm_config_file
fi
}

_setup_fabric_manager() {
local fmpm_socket_path="$1"

if _assert_nvlink5_system; then
_ensure_nvlink5_prerequisites || return 1

fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg
_configure_fabric_manager_config "$fm_config_file" "$fmpm_socket_path"

fm_pid_file=/var/run/nvidia-fabricmanager/nv-fabricmanager.pid
nvlsm_config_file=/usr/share/nvidia/nvlsm/nvlsm.conf
nvlsm_pid_file=/var/run/nvidia-fabricmanager/nvlsm.pid

echo "Starting NVIDIA fabric manager daemon for NVLink5+..."

/usr/bin/nvidia-fabricmanager-start.sh --mode start \
--fm-config-file $fm_config_file \
--fm-pid-file $fm_pid_file \
--nvlsm-config-file $nvlsm_config_file \
--nvlsm-pid-file $nvlsm_pid_file

# If not a NVLink5+ switch, check for the presence of NVLink4 (or below) switches
elif _assert_nvswitch_system; then
fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg
_configure_fabric_manager_config "$fm_config_file" "$fmpm_socket_path"

echo "Starting NVIDIA fabric manager daemon..."
nv-fabricmanager -c $fm_config_file
fi
}

# Capture GPU PCI address to physical module ID mapping and persist to JSON file.
_capture_gpu_mapping() {
local gpu_mapping

echo "Capturing GPU PCI to Module ID mapping..."
if command -v nvidia-smi >/dev/null 2>&1; then
gpu_mapping=$(nvidia-smi -q | egrep "(Module|Bus).*Id")
if [ -n "$gpu_mapping" ]; then
echo "$gpu_mapping"
# Parse and convert to JSON format
json_entries=""
module_id=""
while IFS= read -r line; do
if [[ "$line" =~ Module\ Id.*:\ ([0-9]+) ]]; then
module_id="${BASH_REMATCH[1]}"
elif [[ "$line" =~ Bus\ Id.*:\ ([0-9A-Fa-f:\.]+) ]] && [ -n "$module_id" ]; then
pci_id="${BASH_REMATCH[1]}"
if [ -n "$json_entries" ]; then
json_entries="${json_entries},"
fi
json_entries="${json_entries}\"${pci_id}\": \"${module_id}\""
module_id=""
fi
done <<< "$gpu_mapping"

mkdir -p /run/nvidia-fabricmanager
echo "{${json_entries}}" > /run/nvidia-fabricmanager/gpu-pci-module-mapping.json
echo "GPU mapping saved to /run/nvidia-fabricmanager/gpu-pci-module-mapping.json"
else
echo "Warning: Could not retrieve GPU PCI to Module ID mapping"
fi
else
echo "Warning: nvidia-smi not available for GPU mapping"
fi
}

# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
Expand Down Expand Up @@ -380,6 +462,7 @@ _load_driver() {
local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local set_fw_path="true"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"
local fmpm_socket_path="/run/nvidia-fabricmanager/fmpm.sock"
for param in "${NVIDIA_MODULE_PARAMS[@]}"; do
if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then
set_fw_path="false"
Expand Down Expand Up @@ -418,8 +501,12 @@ _load_driver() {
set +o xtrace -o nounset
fi

echo "Starting NVIDIA persistence daemon..."
nvidia-persistenced --persistence-mode
if [ "${FABRIC_MANAGER_FABRIC_MODE}" = "1" ]; then
echo "Skipping NVIDIA persistence daemon..."
else
echo "Starting NVIDIA persistence daemon..."
nvidia-persistenced --persistence-mode
fi

if [ "${DRIVER_TYPE}" = "vgpu" ]; then
echo "Copying gridd.conf..."
Expand All @@ -437,25 +524,9 @@ _load_driver() {
_start_vgpu_topology_daemon
fi

if _assert_nvlink5_system; then
_ensure_nvlink5_prerequisites || return 1
echo "Starting NVIDIA fabric manager daemon for NVLink5+..."

fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg
fm_pid_file=/var/run/nvidia-fabricmanager/nv-fabricmanager.pid
nvlsm_config_file=/usr/share/nvidia/nvlsm/nvlsm.conf
nvlsm_pid_file=/var/run/nvidia-fabricmanager/nvlsm.pid
/usr/bin/nvidia-fabricmanager-start.sh --mode start \
--fm-config-file $fm_config_file \
--fm-pid-file $fm_pid_file \
--nvlsm-config-file $nvlsm_config_file \
--nvlsm-pid-file $nvlsm_pid_file
_setup_fabric_manager "${fmpm_socket_path}"

# If not a NVLink5+ switch, check for the presence of NVLink4 (or below) switches
elif _assert_nvswitch_system; then
echo "Starting NVIDIA fabric manager daemon..."
nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg
fi
_capture_gpu_mapping
}

# Stop persistenced and unload the kernel modules if they are currently loaded.
Expand Down