Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync dependencies to Redis #10290

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
12c4711
IcingaDB: Start keeping track of Host/Service to Dependency relationship
oxzi Oct 21, 2024
dae5e11
Checkable: Introduce `GetAllChildrenCount()` method
yhabteab Dec 2, 2024
9418634
IcingaDB: Add `affected_children` to `Host/Service` Redis updates
yhabteab Dec 2, 2024
d4699d1
IcingaDB: Sync `affects_children` as part of runtime state updates
yhabteab Dec 2, 2024
dff9be0
Dependency: Don't allow to change `redundancy_group` at runtime
yhabteab Dec 5, 2024
04815ab
Introduce `DependencyGroup` helper class
yhabteab Dec 5, 2024
132effd
DependencyGroup: Add a global registry & deduplication logic
yhabteab Dec 12, 2024
db5cc76
Add `DependencyGroup::GetState()` helper method
yhabteab Dec 5, 2024
cab3a1a
Checkable: Store dependencies grouped by their redundancy group
yhabteab Dec 4, 2024
d636471
IcingaDB: Dump checkables dependencies config to redis correctly
yhabteab Dec 4, 2024
88d818c
IcingaDB: Sync dependencies states to Redis
yhabteab Dec 4, 2024
a830eba
IcingaDB: Sync dependencies initial states on config dump
yhabteab Dec 5, 2024
dae5f6f
IcingaDB: Handle runtime removed dependencies correctly
yhabteab Dec 9, 2024
6893325
Checkable: Drop unused `failedDependency` argument from `IsReachable()`
yhabteab Dec 12, 2024
4618464
Checkable: Use redundancy groups state in `IsReachable`
yhabteab Dec 16, 2024
e6327cc
tests: Add unittests for the redundancy groups registry
yhabteab Dec 16, 2024
941fa57
IcingaDB: Bump expected redis version to `6`
yhabteab Dec 16, 2024
8ae1fb3
Activate `Dependency` objects before their parent objects
yhabteab Jan 10, 2025
b8c0a72
tests: Add the new unittests to the `CMakefile.txt`
yhabteab Jan 13, 2025
17ba7c9
IcingaDB: Send reachablity state updates for all children recursively
yhabteab Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 86 additions & 52 deletions lib/icinga/checkable-dependency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,40 @@

using namespace icinga;

void Checkable::AddDependency(const Dependency::Ptr& dep)
void Checkable::AddDependencyGroup(const DependencyGroup::Ptr& dependencyGroup)
{
std::unique_lock<std::mutex> lock(m_DependencyMutex);
m_Dependencies.insert(dep);
m_DependencyGroups.insert(dependencyGroup);
}

void Checkable::RemoveDependency(const Dependency::Ptr& dep)
void Checkable::RemoveDependencyGroup(const DependencyGroup::Ptr& dependencyGroup)
{
std::unique_lock<std::mutex> lock(m_DependencyMutex);
m_Dependencies.erase(dep);
m_DependencyGroups.erase(dependencyGroup);
}

std::vector<DependencyGroup::Ptr> Checkable::GetDependencyGroups() const
{
std::unique_lock lock(m_DependencyMutex);
return {m_DependencyGroups.begin(), m_DependencyGroups.end()};
}

std::vector<Dependency::Ptr> Checkable::GetDependencies() const
{
std::unique_lock<std::mutex> lock(m_DependencyMutex);
return std::vector<Dependency::Ptr>(m_Dependencies.begin(), m_Dependencies.end());
std::vector<Dependency::Ptr> dependencies;
for (const auto& dependencyGroup : m_DependencyGroups) {
auto members(dependencyGroup->GetMembers(this));
dependencies.insert(dependencies.end(), members.begin(), members.end());
}

return dependencies;
}

bool Checkable::HasAnyDependencies() const
{
std::unique_lock lock(m_DependencyMutex);
return !m_DependencyGroups.empty() || !m_ReverseDependencies.empty();
}

void Checkable::AddReverseDependency(const Dependency::Ptr& dep)
Expand All @@ -43,7 +61,7 @@ std::vector<Dependency::Ptr> Checkable::GetReverseDependencies() const
return std::vector<Dependency::Ptr>(m_ReverseDependencies.begin(), m_ReverseDependencies.end());
}

bool Checkable::IsReachable(DependencyType dt, Dependency::Ptr *failedDependency, int rstack) const
bool Checkable::IsReachable(DependencyType dt, int rstack) const
{
/* Anything greater than 256 causes recursion bus errors. */
int limit = 256;
Expand All @@ -55,66 +73,53 @@ bool Checkable::IsReachable(DependencyType dt, Dependency::Ptr *failedDependency
return false;
}

for (const Checkable::Ptr& checkable : GetParents()) {
if (!checkable->IsReachable(dt, failedDependency, rstack + 1))
return false;
}

/* implicit dependency on host if this is a service */
const auto *service = dynamic_cast<const Service *>(this);
if (service && (dt == DependencyState || dt == DependencyNotification)) {
Host::Ptr host = service->GetHost();

if (host && host->GetState() != HostUp && host->GetStateType() == StateTypeHard) {
if (failedDependency)
*failedDependency = nullptr;

return false;
}
}

auto deps = GetDependencies();

std::unordered_map<std::string, Dependency::Ptr> violated; // key: redundancy group, value: nullptr if satisfied, violating dependency otherwise

for (const Dependency::Ptr& dep : deps) {
std::string redundancy_group = dep->GetRedundancyGroup();

if (!dep->IsAvailable(dt)) {
if (redundancy_group.empty()) {
for (auto& dependencyGroup : GetDependencyGroups()) {
if (!(dependencyGroup->GetState(dt, rstack + 1) & DependencyGroup::State::ReachableOK)) {
if (dependencyGroup->IsRedundancyGroup()) { // For non-redundant groups, this should already be logged.
Log(LogDebug, "Checkable")
<< "Non-redundant dependency '" << dep->GetName() << "' failed for checkable '" << GetName() << "': Marking as unreachable.";

if (failedDependency)
*failedDependency = dep;

return false;
<< "All dependencies in redundancy group '" << dependencyGroup->GetName() << "' have failed for checkable '"
<< GetName() << "': Marking as unreachable.";
}

// tentatively mark this dependency group as failed unless it is already marked;
// so it either passed before (don't overwrite) or already failed (so don't care)
// note that std::unordered_map::insert() will not overwrite an existing entry
violated.insert(std::make_pair(redundancy_group, dep));
} else if (!redundancy_group.empty()) {
violated[redundancy_group] = nullptr;
return false;
}
}

auto violator = std::find_if(violated.begin(), violated.end(), [](auto& v) { return v.second != nullptr; });
if (violator != violated.end()) {
Log(LogDebug, "Checkable")
<< "All dependencies in redundancy group '" << violator->first << "' have failed for checkable '" << GetName() << "': Marking as unreachable.";

if (failedDependency)
*failedDependency = violator->second;
return true;
}

/**
* Checks whether the last check result of this Checkable affects its child dependencies.
*
* @return bool - Returns true if the Checkable affects its child dependencies, otherwise false.
*/
bool Checkable::AffectsChildren() const
{
auto cr(GetLastCheckResult());
if (!cr || IsStateOK(cr->GetState()) || !IsReachable()) {
// If there is no check result, the state is OK, or the Checkable is not reachable, we can't
// safely determine whether the Checkable affects its child dependencies.
return false;
}

if (failedDependency)
*failedDependency = nullptr;
for (auto& dep: GetReverseDependencies()) {
if (!dep->IsAvailable(DependencyState)) {
// If one of the child dependency is not available, then it's definitely due to the
// current Checkable state, so we don't need to verify the remaining ones.
return true;
}
}

return true;
return false;
}

std::set<Checkable::Ptr> Checkable::GetParents() const
Expand Down Expand Up @@ -145,6 +150,21 @@ std::set<Checkable::Ptr> Checkable::GetChildren() const
return parents;
}

/**
* Retrieve the total number of all the children of the current Checkable.
*
* Note, due to the max recursion limit of 256, the returned number may not reflect
* the actual total number of children involved in the dependency chain.
*
* @return int - Returns the total number of all the children of the current Checkable.
*/
size_t Checkable::GetAllChildrenCount() const
{
std::set<Checkable::Ptr> children(GetChildren());
GetAllChildrenInternal(children, 0);
return children.size();
}

std::set<Checkable::Ptr> Checkable::GetAllChildren() const
{
std::set<Checkable::Ptr> children = GetChildren();
Expand All @@ -154,22 +174,36 @@ std::set<Checkable::Ptr> Checkable::GetAllChildren() const
return children;
}

/**
* Retrieve all direct and indirect children of the current Checkable.
*
* Note, this function performs a recursive call chain traversing all the children of the current Checkable
* up to a certain limit (256). When that limit is reached, it will log a warning message and abort the operation.
*
* @param children - The set of children to be filled with all the children of the current Checkable.
* @param level - The current level of recursion.
*/
void Checkable::GetAllChildrenInternal(std::set<Checkable::Ptr>& children, int level) const
{
if (level > 32)
return;
// The previous limit (32) doesn't seem to make sense, and appears to be some random number.
// So, this limit is set to 256 to match the limit in IsReachable().
if (level > 256) {
Log(LogWarning, "Checkable")
<< "Too many nested dependencies (>" << 256 << ") for checkable '" << GetName() << "': aborting traversal.";
return ;
}

std::set<Checkable::Ptr> localChildren;

for (const Checkable::Ptr& checkable : children) {
std::set<Checkable::Ptr> cChildren = checkable->GetChildren();

if (!cChildren.empty()) {
if (auto cChildren(checkable->GetChildren()); !cChildren.empty()) {
GetAllChildrenInternal(cChildren, level + 1);
localChildren.insert(cChildren.begin(), cChildren.end());
}

localChildren.insert(checkable);
if (level != 0) { // Recursion level 0 is the initiator, so checkable is already in the set.
localChildren.insert(checkable);
}
}

children.insert(localChildren.begin(), localChildren.end());
Expand Down
13 changes: 9 additions & 4 deletions lib/icinga/checkable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ enum FlappingStateFilter
class CheckCommand;
class EventCommand;
class Dependency;
class DependencyGroup;

/**
* An Icinga service.
Expand All @@ -77,10 +78,12 @@ class Checkable : public ObjectImpl<Checkable>
std::set<Checkable::Ptr> GetParents() const;
std::set<Checkable::Ptr> GetChildren() const;
std::set<Checkable::Ptr> GetAllChildren() const;
size_t GetAllChildrenCount() const;

void AddGroup(const String& name);

bool IsReachable(DependencyType dt = DependencyState, intrusive_ptr<Dependency> *failedDependency = nullptr, int rstack = 0) const;
bool IsReachable(DependencyType dt = DependencyState, int rstack = 0) const;
bool AffectsChildren() const;

AcknowledgementType GetAcknowledgement();

Expand Down Expand Up @@ -182,9 +185,11 @@ class Checkable : public ObjectImpl<Checkable>
bool IsFlapping() const;

/* Dependencies */
void AddDependency(const intrusive_ptr<Dependency>& dep);
void RemoveDependency(const intrusive_ptr<Dependency>& dep);
void AddDependencyGroup(const intrusive_ptr<DependencyGroup>& dependencyGroup);
void RemoveDependencyGroup(const intrusive_ptr<DependencyGroup>& dependencyGroup);
std::vector<intrusive_ptr<DependencyGroup>> GetDependencyGroups() const;
std::vector<intrusive_ptr<Dependency> > GetDependencies() const;
bool HasAnyDependencies() const;

void AddReverseDependency(const intrusive_ptr<Dependency>& dep);
void RemoveReverseDependency(const intrusive_ptr<Dependency>& dep);
Expand Down Expand Up @@ -244,7 +249,7 @@ class Checkable : public ObjectImpl<Checkable>

/* Dependencies */
mutable std::mutex m_DependencyMutex;
std::set<intrusive_ptr<Dependency> > m_Dependencies;
std::set<intrusive_ptr<DependencyGroup>> m_DependencyGroups;
std::set<intrusive_ptr<Dependency> > m_ReverseDependencies;

void GetAllChildrenInternal(std::set<Checkable::Ptr>& children, int level = 0) const;
Expand Down
Loading
Loading