From cdea272202c7f49828e431d755659c67958b4a9f Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 22 Jan 2026 14:56:51 +0000 Subject: [PATCH] dstack-util: notify host on boot error Wrap the system setup to catch errors and notify the VMM via host_api.notify_q("boot.error") before returning. Also adds context to the request_app_keys error for better diagnostics. --- dstack-util/src/system_setup.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dstack-util/src/system_setup.rs b/dstack-util/src/system_setup.rs index 1bcf849f..ae88cb74 100644 --- a/dstack-util/src/system_setup.rs +++ b/dstack-util/src/system_setup.rs @@ -511,6 +511,15 @@ fn emit_key_provider_info(provider_info: &KeyProviderInfo) -> Result<()> { pub async fn cmd_sys_setup(args: SetupArgs) -> Result<()> { let stage0 = Stage0::load(&args)?; + let vmm = stage0.host_api(); + let result = do_sys_setup(stage0).await; + if let Err(err) = &result { + vmm.notify_q("boot.error", &format!("{err:#}")).await; + } + result +} + +async fn do_sys_setup(stage0: Stage0<'_>) -> Result<()> { if stage0.shared.app_compose.secure_time { info!("Waiting for the system time to be synchronized"); cmd! { @@ -585,6 +594,12 @@ struct Stage1<'a> { } impl<'a> Stage0<'a> { + fn host_api(&self) -> HostApi { + HostApi::new( + self.shared.sys_config.host_api_url.clone(), + self.shared.sys_config.pccs_url.clone(), + ) + } fn load(args: &'a SetupArgs) -> Result { let host_shared_copy_dir = args.work_dir.join(HOST_SHARED_DIR_NAME); let host_shared = HostShared::copy("/tmp/.host-shared".as_ref(), &host_shared_copy_dir)?; @@ -1155,7 +1170,10 @@ impl<'a> Stage0<'a> { self.vmm .notify_q("boot.progress", "requesting app keys") .await; - let app_keys = self.request_app_keys().await?; + let app_keys = self + .request_app_keys() + .await + .context("Failed to request app keys")?; if app_keys.disk_crypt_key.is_empty() { bail!("Failed to get valid key phrase from KMS"); }