From e72eaaf223abd428b13fdaeaa612e94fa60a1ca4 Mon Sep 17 00:00:00 2001 From: Fredi Raspall Date: Wed, 21 Jan 2026 10:24:21 +0100 Subject: [PATCH 1/3] Fix warn unhandled cases in switch Signed-off-by: Fredi Raspall --- src/hh_dp_process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hh_dp_process.c b/src/hh_dp_process.c index 4892602..2236ce6 100644 --- a/src/hh_dp_process.c +++ b/src/hh_dp_process.c @@ -271,6 +271,8 @@ static hh_dp_res_t hh_process(struct zebra_dplane_ctx *ctx) case DPLANE_OP_TC_FILTER_ADD: case DPLANE_OP_TC_FILTER_DELETE: case DPLANE_OP_TC_FILTER_UPDATE: + case DPLANE_OP_VLAN_INSTALL: + case DPLANE_OP_PROVIDER_REFRESH: return HH_IGNORED; /* Startup Control */ From 61949ae3649fb97b4a4276bb4f1249a5fefe0b60 Mon Sep 17 00:00:00 2001 From: Fredi Raspall Date: Fri, 23 Jan 2026 15:47:04 +0100 Subject: [PATCH 2/3] Fix: fix termination logic * don't cancel events on termination * Zebra dplane may call plugin termination functions more than once. Fix the methods so that they can be called multiple times without creating double-frees or use-after-free conditions. * Add methods to get list counts and free dplane ctx when shutting down. * Handle the fact that while terminating, zebra may attempt to delete routes. When zebra shuts down, the plugin will not delete any state in dataplane. This is intended as the expectation is that a new instance will be promptly spawn and dataplane will then handle any stale routes left by the leaving zebra instance after a while. If this behavior is not wanted, the solution is to let the plugin issue all requests without expecting (caching) any request, unless we want the plugin to block zebra shutdown for all requests to be answered. Similarly, when finalizing, the plugin answers with a failure to zebra. This is so that it believes that all deletions failed, preventing it from removing routes (*) from the kernel VRF. This is so that, should zebra fail but not other daemons (BGP), those can still communicate using the kernel routing tables. Signed-off-by: Fredi Raspall --- src/hh_dp_comm.c | 17 ++++++++--------- src/hh_dp_comm.h | 1 + src/hh_dp_msg.c | 19 +++++++++++++------ src/hh_dp_msg.h | 2 ++ src/hh_dp_msg_cache.c | 38 ++++++++++++++++++++++++++++++++++---- src/hh_dp_msg_cache.h | 4 +++- src/hh_dp_plugin.c | 20 ++++++++++++++++---- 7 files changed, 77 insertions(+), 24 deletions(-) diff --git a/src/hh_dp_comm.c b/src/hh_dp_comm.c index ae9a134..227148a 100644 --- a/src/hh_dp_comm.c +++ b/src/hh_dp_comm.c @@ -58,6 +58,7 @@ static uint64_t synt = 0; /* global */ struct fmt_buff *fb = NULL; bool log_dataplane_msg = true; +bool finalizing = false; /* tell if a unix path is valid */ static bool is_valid_unix_path(const char *path) @@ -365,6 +366,7 @@ static void wakeon_dp_write_avail(void) /* Main function to send an RPC message. The dp_msg is not sent straight but queued in the * unsent queue (to preserve ordering) and any prior pending messages are sent first, by * calling send_pending_rpc_msgs(). Connect requests overtake any pending messsages. + * This function takes ownership of dp_msg and is responsible for recycling it or queueing it. */ int send_rpc_msg(struct dp_msg *dp_msg) { @@ -499,17 +501,15 @@ void fini_dplane_rpc(void) /* close Unix sock */ dp_unix_sock_close(); - /* politeness */ - event_cancel(&ev_recv); - event_cancel(&ev_send); - event_cancel(&ev_connect_timer); - event_cancel(&ev_keepalive); - /* destroy rx / tx buffers */ - if (tx_buff) + if (tx_buff) { buff_free(tx_buff); - if (rx_buff) + tx_buff = NULL; + } + if (rx_buff) { buff_free(rx_buff); + rx_buff = NULL; + } /* finalize message cache */ fini_dp_msg_cache(); @@ -519,7 +519,6 @@ void fini_dplane_rpc(void) fini_fmt_buff(fb); fb = NULL; } - } /* allocate Tx/Rx buffers for RPC encoding/decoding */ diff --git a/src/hh_dp_comm.h b/src/hh_dp_comm.h index 10c1756..374f89c 100644 --- a/src/hh_dp_comm.h +++ b/src/hh_dp_comm.h @@ -8,6 +8,7 @@ #include "hh_dp_msg_cache.h" extern bool log_dataplane_msg; +extern bool finalizing; /* set dp unix sock local path */ int set_dp_sock_local_path(const char *path); diff --git a/src/hh_dp_msg.c b/src/hh_dp_msg.c index f876afe..e2700bc 100644 --- a/src/hh_dp_msg.c +++ b/src/hh_dp_msg.c @@ -299,6 +299,18 @@ static void handle_rpc_connect_response(struct RpcResponse *resp, bool purged) abort(); } } + +void dp_msg_hand_off(struct dp_msg *dp_msg, enum zebra_dplane_result result) { + + /* set result */ + dplane_ctx_set_status(dp_msg->ctx, result); + + /* queue back to zebra */ + dplane_provider_enqueue_out_ctx(prov_p, dp_msg->ctx); + dplane_provider_work_ready(); + dp_msg->ctx = NULL; /* imposed to allow recycle */ +} + static void handle_rpc_ctx_response(struct dp_msg *m, RpcResultCode rescode) { BUG(!prov_p); @@ -307,12 +319,7 @@ static void handle_rpc_ctx_response(struct dp_msg *m, RpcResultCode rescode) if (m->ctx) { /* set the result - we treat ignored requests as successes for the time being */ enum zebra_dplane_result result = (rescode == Ok || rescode == Ignored) ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; - dplane_ctx_set_status(m->ctx, result); - - /* queue back to zebra */ - dplane_provider_enqueue_out_ctx(prov_p, m->ctx); - dplane_provider_work_ready(); - m->ctx = NULL; /* imposed to allow recycle */ + dp_msg_hand_off(m, result); } } static void do_handle_rpc_response(struct RpcResponse *resp, struct dp_msg *m, bool purged) { diff --git a/src/hh_dp_msg.h b/src/hh_dp_msg.h index e27e374..6e74982 100644 --- a/src/hh_dp_msg.h +++ b/src/hh_dp_msg.h @@ -16,4 +16,6 @@ int send_rpc_control(uint8_t refresh); /* Entry point for RPC msg processing */ void handle_rpc_msg(struct RpcMsg *msg); +void dp_msg_hand_off(struct dp_msg *dp_msg, enum zebra_dplane_result result); + #endif /* SRC_HH_DP_MSG_H_ */ diff --git a/src/hh_dp_msg_cache.c b/src/hh_dp_msg_cache.c index 70ed5f9..57fdabd 100644 --- a/src/hh_dp_msg_cache.c +++ b/src/hh_dp_msg_cache.c @@ -4,6 +4,7 @@ #include "lib/libfrr.h" #include "hh_dp_internal.h" #include "hh_dp_msg_cache.h" +#include "zebra/zebra_dplane.h" /* dplane_ctx_reset() */ DEFINE_MTYPE(ZEBRA, HH_DP_MSG, "HH Dataplane msg"); @@ -18,11 +19,23 @@ struct dp_msg_cache { static void dp_msg_del(struct dp_msg *msg) { BUG(!msg); + if (msg->ctx) { + zlog_warn("Deleting dp_msg referring to a ctx..."); + /* dplane_ctx_free is private, hence we cannot free the context. This is only + * called on shutdown, so all good */ #if 0 - /* dplane_ctx_free is private, hence we cannot free the context */ - if (msg->ctx) dplane_ctx_free(&msg->ctx); +#else + /* Workaround that avoids needing to patch frr: + * call dplane_ctx_reset() and free ourselves. + * XFREE wraps free(). The danger here is that the memory tracking will + * not be aware of this free. But that's fine since we do that on termination. + */ + dplane_ctx_reset(msg->ctx); + free(msg->ctx); + msg->ctx = NULL; #endif + } XFREE(MTYPE_HH_DP_MSG, msg); } @@ -34,7 +47,6 @@ static void empty_dp_msg_list(struct dp_msg_list_head *list, const char *name) zlog_debug("Emptying cache list '%s' (%zu messages)", name, dp_msg_list_count(list)); while((msg = dp_msg_list_pop(list)) != NULL) dp_msg_del(msg); - dp_msg_list_fini(list); } /* allocate a dp_msg */ @@ -53,6 +65,13 @@ struct dp_msg *dp_msg_new(void) return m; } +void log_dp_msg_lists(void) { + zlog_debug("pool: %zu unsent: %zu in-flight: %zu", + dp_msg_pool_count(), + dp_msg_unsent_count(), + dp_msg_in_flight_count()); +} + /* recycle a dp_msg for later reuse */ void dp_msg_recycle(struct dp_msg *msg) { @@ -88,10 +107,21 @@ size_t dp_msg_unsent_count(void) { return dp_msg_list_count(&msg_cache.unsent); } +/* length of pool list */ +size_t dp_msg_pool_count(void) { + return dp_msg_list_count(&msg_cache.pool); +} + +/* length of inflight list */ +size_t dp_msg_in_flight_count(void) { + return dp_msg_list_count(&msg_cache.in_flight); +} + /* Cache a message (e.g. a Request) until we get the corresponding response */ void dp_msg_cache_inflight(struct dp_msg *msg) { BUG(!msg); + assert(msg->msg.type == Request); dp_msg_list_add_tail(&msg_cache.in_flight, msg); } @@ -118,7 +148,7 @@ int init_dp_msg_cache(void) if (msg) dp_msg_list_add_tail(&msg_cache.pool, msg); } - zlog_debug("Initialized cache with pool of %zu messages", dp_msg_list_count(&msg_cache.pool)); + zlog_debug("Initialized cache with pool of %zu messages", dp_msg_pool_count()); return 0; } diff --git a/src/hh_dp_msg_cache.h b/src/hh_dp_msg_cache.h index 6ebdc55..65c59e1 100644 --- a/src/hh_dp_msg_cache.h +++ b/src/hh_dp_msg_cache.h @@ -41,8 +41,10 @@ void dp_msg_unsent_push_back(struct dp_msg *msg); struct dp_msg *dp_msg_pop_unsent(void); -/* length of unsent list */ +/* length of lists */ +size_t dp_msg_pool_count(void); size_t dp_msg_unsent_count(void); +size_t dp_msg_in_flight_count(void); /* put message in list of sent messages */ void dp_msg_cache_inflight(struct dp_msg *msg); diff --git a/src/hh_dp_plugin.c b/src/hh_dp_plugin.c index dedf5d3..c8288f0 100644 --- a/src/hh_dp_plugin.c +++ b/src/hh_dp_plugin.c @@ -78,9 +78,12 @@ static int zd_hh_start(struct zebra_dplane_provider *prov) */ static int zd_hh_fini(struct zebra_dplane_provider *prov, bool early) { - zlog_info("%s: Finalizing...", dplane_provider_get_name(prov)); - - fini_dplane_rpc(); + if (early) { + zlog_info("%s: Finalizing...", dplane_provider_get_name(prov)); + finalizing = true; + } else { + fini_dplane_rpc(); + } return 0; } @@ -104,8 +107,17 @@ static int zd_hh_process(struct zebra_dplane_provider *prov) if (!ctx) break; - zd_hh_process_update(prov, ctx); + if (!finalizing) { + zd_hh_process_update(prov, ctx); + } else { + // tell zebra deletions failed + dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_FAILURE); + dplane_provider_enqueue_out_ctx(prov, ctx); + } } + + if (finalizing) + dplane_provider_work_ready(); return 0; } From b3a48ea26c0bac514aeb589673126ca3356634ba Mon Sep 17 00:00:00 2001 From: Fredi Raspall Date: Mon, 26 Jan 2026 13:30:53 +0100 Subject: [PATCH 3/3] Fix some error conditions Signed-off-by: Fredi Raspall --- src/CMakeLists.txt | 4 ++-- src/hh_dp_comm.c | 16 +++++++++++++--- src/hh_dp_internal.h | 1 - src/hh_dp_msg.c | 2 -- src/hh_dp_plugin.c | 1 - src/hh_dp_process.c | 1 - 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 511965c..fe46439 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,10 +64,10 @@ set(VERBOSE false) set(CMAKE_VERBOSE_MAKEFILE true) if(CMAKE_BUILD_TYPE STREQUAL Debug) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-declarations") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-declarations -Wno-error=pedantic") set(DEBUG_BUILD true) else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-declarations") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-declarations -Wno-error=pedantic") set(DEBUG_BUILD false) endif() diff --git a/src/hh_dp_comm.c b/src/hh_dp_comm.c index 227148a..3ed9851 100644 --- a/src/hh_dp_comm.c +++ b/src/hh_dp_comm.c @@ -157,7 +157,11 @@ static int dp_unix_sock_open(const char *bind_path) /* build bind address */ struct sockaddr_un un_src = {0}; size_t path_len = strlen(bind_path); - BUG(!path_len || path_len >= sizeof(un_src.sun_path), -1); + if (!path_len || path_len >= sizeof(un_src.sun_path)) { + zlog_err("Invalid path len %zu", path_len); + goto fail; + } + un_src.sun_family = AF_UNIX; strncpy(un_src.sun_path, bind_path, sizeof(un_src.sun_path)); @@ -557,11 +561,11 @@ int init_dplane_rpc(void) /* open unix socket and bind it */ dp_sock = dp_unix_sock_open(plugin_sock_path); if (dp_sock == NO_SOCK) - return -1; + goto fail; /* allocate Tx/Rx buffers for RPC encoding/decoding */ if (init_rpc_buffers() != 0) - return -1; + goto fail; /* initialize msg cache */ init_dp_msg_cache(); @@ -577,4 +581,10 @@ int init_dplane_rpc(void) /* success */ return 0; + +fail: + dp_unix_sock_close(); + fini_fmt_buff(fb); + fb = NULL; + return -1; } diff --git a/src/hh_dp_internal.h b/src/hh_dp_internal.h index c854198..f5673ff 100644 --- a/src/hh_dp_internal.h +++ b/src/hh_dp_internal.h @@ -3,7 +3,6 @@ #ifndef SRC_HH_DP_INTERNAL_H_ #define SRC_HH_DP_INTERNAL_H_ -#include "lib/assert/assert.h" #include "lib/zlog.h" #include "hh_dp_config.h" diff --git a/src/hh_dp_msg.c b/src/hh_dp_msg.c index e2700bc..1a83d09 100644 --- a/src/hh_dp_msg.c +++ b/src/hh_dp_msg.c @@ -10,9 +10,7 @@ #include "zebra/zebra_dplane.h" #include "lib/libfrr.h" -#include "lib/assert/assert.h" #include "zebra/debug.h" - #include "hh_dp_internal.h" #include "hh_dp_comm.h" /* send_rpc_msg() && dplane_is_ready() */ #include "hh_dp_process.h" /* struct zebra_dplane_provider */ diff --git a/src/hh_dp_plugin.c b/src/hh_dp_plugin.c index c8288f0..83b950a 100644 --- a/src/hh_dp_plugin.c +++ b/src/hh_dp_plugin.c @@ -3,7 +3,6 @@ #include "config.h" /* FRR config: must include */ #include "lib/zebra.h" #include "lib/libfrr.h" -#include "lib/assert/assert.h" #include "zebra/zebra_dplane.h" #include "zebra/debug.h" diff --git a/src/hh_dp_process.c b/src/hh_dp_process.c index 2236ce6..f8d0a31 100644 --- a/src/hh_dp_process.c +++ b/src/hh_dp_process.c @@ -3,7 +3,6 @@ #include "config.h" /* Include this explicitly */ #include "lib/zebra.h" #include "lib/libfrr.h" -#include "lib/assert/assert.h" #include "zebra/zebra_dplane.h" #include "zebra/debug.h"