Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions dev-tools/omicron-dev/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,24 @@ omicron-rpaths.workspace = true
[dependencies]
anyhow.workspace = true
camino.workspace = true
chrono.workspace = true
clap.workspace = true
dropshot.workspace = true
futures.workspace = true
gateway-test-utils.workspace = true
illumos-utils.workspace = true
libc.workspace = true
nexus-config.workspace = true
nexus-test-interface.workspace = true
nexus-test-utils = { workspace = true, features = ["omicron-dev"] }
omicron-nexus.workspace = true
omicron-workspace-hack.workspace = true
omicron-sled-agent.workspace = true
oxide-tokio-rt.workspace = true
# See omicron-rpaths for more about the "pq-sys" dependency.
pq-sys = "*"
signal-hook-tokio.workspace = true
sled-agent-types.workspace = true
tokio.workspace = true
toml.workspace = true

Expand Down
24 changes: 23 additions & 1 deletion dev-tools/omicron-dev/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@ use libc::SIGINT;
use nexus_config::NexusConfig;
use nexus_test_interface::NexusServer;
use nexus_test_utils::resource_helpers::DiskTest;
use omicron_sled_agent::sim::ConfigHealthMonitor;
use signal_hook_tokio::Signals;
use std::fs;

const DEFAULT_NEXUS_CONFIG: &str =
concat!(env!("CARGO_MANIFEST_DIR"), "/../../nexus/examples/config.toml");

const DEFAULT_HEALTH_MONITOR_CONFIG: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../sled-agent/tests/configs/health_monitor_sim.toml"
);

fn main() -> anyhow::Result<()> {
oxide_tokio_rt::run(async {
let args = OmicronDevApp::parse();
Expand Down Expand Up @@ -57,6 +63,9 @@ struct RunAllArgs {
/// Override the nexus configuration file.
#[clap(long, default_value = DEFAULT_NEXUS_CONFIG)]
nexus_config: Utf8PathBuf,
/// Override the sled agent health monitor configuration file.
#[clap(long, default_value = DEFAULT_HEALTH_MONITOR_CONFIG)]
health_monitor_config: Utf8PathBuf,
}

impl RunAllArgs {
Expand Down Expand Up @@ -87,10 +96,23 @@ impl RunAllArgs {
.set_port(p);
}

let health_monitor_config_str =
fs::read_to_string(&self.health_monitor_config)?;
let sled_agent_health_monitor: ConfigHealthMonitor =
toml::from_str(&health_monitor_config_str).context(format!(
"parsing config: {}",
self.health_monitor_config.as_str()
))?;

println!("omicron-dev: setting up all services ... ");
let cptestctx = nexus_test_utils::omicron_dev_setup_with_config::<
omicron_nexus::Server,
>(&mut config, 0, self.gateway_config.clone())
>(
&mut config,
0,
self.gateway_config.clone(),
sled_agent_health_monitor,
)
.await
.context("error setting up services")?;

Expand Down
4 changes: 2 additions & 2 deletions illumos-utils/src/svcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ impl From<String> for SvcState {
#[serde(rename_all = "snake_case")]
/// Information about an SMF service that is enabled but not running
pub struct SvcInMaintenance {
fmri: String,
zone: String,
pub fmri: String,
pub zone: String,
}

impl SvcInMaintenance {
Expand Down
6 changes: 6 additions & 0 deletions nexus/inventory/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,12 @@ mod test {
None,
sim::ZpoolConfig::None,
SledCpuFamily::AmdMilan,
// For now we disable the health monitor, we can change this preference
// later if necessary.
sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
);

let agent =
Expand Down
19 changes: 15 additions & 4 deletions nexus/test-utils/src/nexus_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use crate::ControlPlaneStarter;
use crate::ControlPlaneTestContextSledAgent;
use crate::starter::PopulateCrdb;
use crate::starter::SledAgentOptions;
use crate::starter::setup_with_config_impl;
#[cfg(feature = "omicron-dev")]
use anyhow::Context;
Expand Down Expand Up @@ -85,9 +86,15 @@ impl<'a> ControlPlaneBuilder<'a> {
setup_with_config_impl(
starter,
PopulateCrdb::FromEnvironmentSeed,
sim::SimMode::Explicit,
SledAgentOptions {
sim_mode: sim::SimMode::Explicit,
extra_sled_agents: self.nextra_sled_agents,
sled_agent_health_monitor: sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
},
self.tls_cert,
self.nextra_sled_agents,
DEFAULT_SP_SIM_CONFIG.into(),
false,
)
Expand Down Expand Up @@ -361,6 +368,7 @@ pub async fn omicron_dev_setup_with_config<N: NexusServer>(
config: &mut NexusConfig,
extra_sled_agents: u16,
gateway_config_file: Utf8PathBuf,
sled_agent_health_monitor: sim::ConfigHealthMonitor,
) -> Result<ControlPlaneTestContext<N>> {
let starter = ControlPlaneStarter::<N>::new("omicron-dev", config);

Expand All @@ -383,9 +391,12 @@ pub async fn omicron_dev_setup_with_config<N: NexusServer>(
Ok(setup_with_config_impl(
starter,
PopulateCrdb::FromSeed { input_tar: seed_tar },
sim::SimMode::Auto,
SledAgentOptions {
sim_mode: sim::SimMode::Auto,
extra_sled_agents,
sled_agent_health_monitor,
},
None,
extra_sled_agents,
gateway_config_file,
true,
)
Expand Down
29 changes: 27 additions & 2 deletions nexus/test-utils/src/starter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
sled_id: SledUuid,
sled_index: u16,
sim_mode: sim::SimMode,
health_monitor: sim::ConfigHealthMonitor,
) {
let nexus_address =
self.nexus_internal_addr.expect("Must launch Nexus first");
Expand All @@ -896,6 +897,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
tempdir.path(),
sim_mode,
&self.simulated_upstairs,
health_monitor,
)
.await
.expect("Failed to start sled agent");
Expand Down Expand Up @@ -1000,6 +1002,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
sled_id: SledUuid,
sled_index: u16,
sim_mode: sim::SimMode,
health_monitor: sim::ConfigHealthMonitor,
) {
let nexus_address =
self.nexus_internal_addr.expect("Must launch Nexus first");
Expand All @@ -1016,6 +1019,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
tempdir.path(),
sim_mode,
&self.simulated_upstairs,
health_monitor,
)
.await
.expect("Failed to start sled agent");
Expand Down Expand Up @@ -1534,15 +1538,26 @@ impl RackInitRequestBuilder {
}
}

#[derive(Debug, Clone)]
pub(crate) struct SledAgentOptions {
pub sim_mode: sim::SimMode,
pub extra_sled_agents: u16,
pub sled_agent_health_monitor: sim::ConfigHealthMonitor,
}

pub(crate) async fn setup_with_config_impl<N: NexusServer>(
mut starter: ControlPlaneStarter<'_, N>,
populate: PopulateCrdb,
sim_mode: sim::SimMode,
sled_agent_opts: SledAgentOptions,
initial_cert: Option<Certificate>,
extra_sled_agents: u16,
gateway_config_file: Utf8PathBuf,
second_nexus: bool,
) -> ControlPlaneTestContext<N> {
let SledAgentOptions {
sim_mode,
extra_sled_agents,
sled_agent_health_monitor,
} = sled_agent_opts;
const STEP_TIMEOUT: Duration = Duration::from_secs(600);

// All setups will start with CRDB and clickhouse
Expand Down Expand Up @@ -1705,6 +1720,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
// The first and second sled agents have special UUIDs, and any extra ones
// after that are random.

let health_monitor = sled_agent_health_monitor.clone();
starter
.init_with_steps(
vec![(
Expand All @@ -1715,6 +1731,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
SLED_AGENT_UUID.parse().unwrap(),
0,
sim_mode,
health_monitor,
)
.boxed()
}),
Expand All @@ -1723,6 +1740,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
)
.await;

let health_monitor = sled_agent_health_monitor.clone();
if extra_sled_agents > 0 {
starter
.init_with_steps(
Expand All @@ -1734,6 +1752,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
SLED_AGENT2_UUID.parse().unwrap(),
1,
sim_mode,
health_monitor,
)
.boxed()
}),
Expand All @@ -1743,7 +1762,9 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
.await;
}

let health_monitor = sled_agent_health_monitor.clone();
for index in 1..extra_sled_agents {
let health_monitor = health_monitor.clone();
starter
.init_with_steps(
vec![(
Expand All @@ -1754,6 +1775,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
SledUuid::new_v4(),
index.checked_add(1).unwrap(),
sim_mode,
health_monitor.clone(),
)
.boxed()
}),
Expand Down Expand Up @@ -1839,6 +1861,7 @@ pub(crate) enum PopulateCrdb {
///
/// Note: you should probably use the `extra_sled_agents` macro parameter on
/// `nexus_test` instead!
#[allow(clippy::too_many_arguments)]
pub async fn start_sled_agent(
log: Logger,
nexus_address: SocketAddr,
Expand All @@ -1847,6 +1870,7 @@ pub async fn start_sled_agent(
update_directory: &Utf8Path,
sim_mode: sim::SimMode,
simulated_upstairs: &Arc<sim::SimulatedUpstairs>,
health_monitor: sim::ConfigHealthMonitor,
) -> Result<sim::Server, String> {
// Generate a baseboard serial number that matches the SP configuration
// (SimGimlet00, SimGimlet01, etc.) so that inventory can link sled agents
Expand All @@ -1861,6 +1885,7 @@ pub async fn start_sled_agent(
sim::ZpoolConfig::None,
SledCpuFamily::AmdMilan,
Some(baseboard_serial),
health_monitor,
);
start_sled_agent_with_config(log, &config, sled_index, simulated_upstairs)
.await
Expand Down
16 changes: 16 additions & 0 deletions nexus/tests/integration_tests/instances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,10 @@ async fn test_instance_migration_compatible_cpu_platforms(
Some(&camino::Utf8Path::new("/an/unused/update/directory")),
omicron_sled_agent::sim::ZpoolConfig::None,
sled_agent_types::inventory::SledCpuFamily::AmdTurin,
omicron_sled_agent::sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
);
let new_sled_id = config.id;

Expand Down Expand Up @@ -1349,6 +1353,10 @@ async fn test_instance_migration_incompatible_cpu_platforms(
Some(&camino::Utf8Path::new("/an/unused/update/directory")),
omicron_sled_agent::sim::ZpoolConfig::None,
sled_agent_types::inventory::SledCpuFamily::AmdTurin,
omicron_sled_agent::sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
);
let turin_sled_id = config.id;

Expand Down Expand Up @@ -1426,6 +1434,10 @@ async fn test_instance_migration_unknown_sled_type(
Some(&camino::Utf8Path::new("/an/unused/update/directory")),
omicron_sled_agent::sim::ZpoolConfig::None,
sled_agent_types::inventory::SledCpuFamily::Unknown,
omicron_sled_agent::sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
);
let new_sled_id = config.id;

Expand Down Expand Up @@ -7125,6 +7137,10 @@ async fn test_can_start_instance_with_cpu_platform(
Some(&camino::Utf8Path::new("/an/unused/update/directory")),
omicron_sled_agent::sim::ZpoolConfig::None,
sled_agent_types::inventory::SledCpuFamily::AmdTurin,
omicron_sled_agent::sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
);
let new_sled_id = config.id;

Expand Down
4 changes: 4 additions & 0 deletions nexus/tests/integration_tests/sleds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) {
&update_directory,
sim::SimMode::Explicit,
&cptestctx.first_sled_agent().simulated_upstairs,
sim::ConfigHealthMonitor {
enabled: false,
sim_health_checks: None,
},
)
.await
.unwrap(),
Expand Down
1 change: 1 addition & 0 deletions sled-agent/health-monitor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ workspace = true
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
chrono.workspace = true
derive_more.workspace = true
dropshot.workspace = true
futures.workspace = true
Expand Down
Loading
Loading