From 5dfa61ec826f68c81ed78a4cebfd1135477a7f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Fri, 15 Mar 2024 01:03:47 +0100 Subject: [PATCH] health: do not rely on the operational DS being valid I've seen the following crash on the real HW recently: #0 0x00000000b6c7979c _ZNK7libyang8DataNode6isTermEv (libyang-cpp.so + 0x1b79c) #1 0x00000000b6c797e8 _ZNK7libyang8DataNode6asTermEv (libyang-cpp.so + 0x1b7e8) #2 0x00000000004927ec operator(), std::optional >, sysrepo::Event, unsigned int> (veliad-health + 0xc7ec) #3 0x00000000b6da378c _ZNKSt8functionIFN7sysrepo9ErrorCodeENS0_7SessionEjSt17basic_string_viewIcSt11char_traitsIcEESt8optionalIS6_ENS0_5EventEjEEclES2_jS6_S8_S9_j (libsysrepo-cpp.so + 0x1378c) #4 0x00000000b6d1b5a0 sr_module_change_subscribe_enable (libsysrepo.so.7 + 0x135a0) #5 0x00000000b6da451c _ZN7sysrepo12Subscription14onModuleChangeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt8functionIFNS_9ErrorCodeENS_7SessionEjSt17basic_string_viewIcS4_ESt8optionalISD_ENS_5EventEjEERKSE_IS6_EjNS_16SubscribeOptionsE (libsysrepo-cpp.so + 0x1451c) #6 0x00000000b6d9ad04 _ZN7sysrepo7Session14onModuleChangeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt8functionIFNS_9ErrorCodeES0_jSt17basic_string_viewIcS4_ESt8optionalISC_ENS_5EventEjEERKSD_IS6_EjNS_16SubscribeOptionsES9_IFvRSt9exceptionEERKSD_INS_10FDHandlingEE (libsysrepo-cpp.so + 0xad04) #7 0x0000000000493990 _ZN5velia6health13AlarmsOutputsC2EN7sysrepo7SessionERKSt6vectorISt8functionIFvNS0_5StateEEESaIS8_EE (veliad-health + 0xd990) #8 0x000000000048e214 main (veliad-health + 0x8214) #9 0x00000000b683e8a8 n/a (libc.so.6 + 0x228a8) ELF object binary architecture: ARM ...which happened while I was playing with some service restarts. Since I do not have anything more complete, my working theory is that sysrepo-ietf-alarms was dead/dying at the time this code run, and once that happened, this callback was invoked with information about a removal of the alarm-summary data. That sounds like something which can happen, and once that happens, the original code would segfault because it dereferenced an empty std::optional. Fix that by wrapping stuff in an if. Change-Id: If9365fa47f09f1cc0e6cb37a2b149c7a587010c3 --- src/health/outputs/AlarmsOutputs.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/health/outputs/AlarmsOutputs.cpp b/src/health/outputs/AlarmsOutputs.cpp index b9c3a00d..758511d9 100644 --- a/src/health/outputs/AlarmsOutputs.cpp +++ b/src/health/outputs/AlarmsOutputs.cpp @@ -56,8 +56,14 @@ AlarmsOutputs::AlarmsOutputs(sysrepo::Session session, const std::vectorfindPath(alarmSummary + "/alarm-summary[severity='"s + severity + "']/not-cleared")->asTerm().valueStr())); + const auto node = data->findPath(alarmSummary + "/alarm-summary[severity='"s + severity + "']/not-cleared"); + if (!node) { + // we are not getting any data, surely that's bad + state = State::ERROR; + break; + } + const auto activeAlarms = std::stoi(std::string(node->asTerm().valueStr())); if (activeAlarms > 0) { state = errorState; break;