Skip to content

Commit

Permalink
Add a record to keep link change (their status and timestamp) in Adja…
Browse files Browse the repository at this point in the history
…centDB.

Summary:
We are trying to understand how long it takes for an up/down link event to be propagated along the network.

It's important to measure this because EBB LspAgent reacts to link changes. For example, if LspAgent sees a down link, it will switch to a new path to avoid traffic loss.

A timestamp of an up/down link event is kept in LinkEvents field in AdjacencyDB (similarly as we do for "PerfEvents") when Netlink in OpenR's LinkMonitor reports link event. You can ask why we don't use "PerfEvents"? It's because PerfEvents' data structure consists of string, which is not suitable for storing link event information.

LinkEvents list is quite small because in EBB we use aggregate ports. Typically a router has less than 20 port channels.

Reviewed By: xiangxu1121

Differential Revision: D53256463

fbshipit-source-id: 9d516985043dcd07093376a86f6a9a2c353a04cc
  • Loading branch information
ngocdmsj authored and facebook-github-bot committed Feb 17, 2024
1 parent 33ec129 commit ccb86a5
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 1 deletion.
6 changes: 6 additions & 0 deletions openr/common/LsdbTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <openr/common/Constants.h>
#include <openr/common/NetworkUtil.h>
#include <openr/common/Types.h>
#include <openr/common/Util.h>
#include <openr/if/gen-cpp2/Network_types.h>
#include <openr/if/gen-cpp2/Types_types.h>
#include <openr/policy/PolicyStructs.h>
Expand Down Expand Up @@ -352,6 +353,11 @@ struct InterfaceInfo {
*/
bool isUp{false};

/**
* Unix timestamp at when link status changed.
*/
int64_t statusChangeTimestamp{0};

/**
* Interface index
*/
Expand Down
6 changes: 6 additions & 0 deletions openr/if/OpenrConfig.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ struct LinkMonitorConfig {
* Enable convergence performance measurement for adjacency updates.
*/
7: bool enable_perf_measurement = true;

/**
* Enable storing link events in adjacent database.
* By default, disable it.
*/
8: bool enable_link_status_measurement = false;
}

struct StepDetectorConfig {
Expand Down
30 changes: 30 additions & 0 deletions openr/if/Types.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ enum SparkNeighEvent {
NEGOTIATION_FAILURE = 8,
}

enum LinkStatusEnum {
DOWN = 0,
UP = 1,
}

/**
* Event object to track the key attribute and timestamp used for performance
* measurement.
Expand All @@ -72,6 +77,25 @@ struct PerfEvents {
1: list<PerfEvent> events;
}

/**
* Link status object to track timestamp at when link changes its status.
*/
struct LinkStatus {
1: LinkStatusEnum status;
2: i64 unixTs = 0;
}

/**
* Dictionary of link status objects with key which is interface name
* and value which is information about link status. We don't need
* to keep node name here because LinkStatusRecords is inside AdjacencyDatabase
* which already has name of the node.
*/
struct LinkStatusRecords {
@cpp.Type{template = "std::unordered_map"}
1: map<string/* interfaceName */ , LinkStatus> linkStatusMap;
}

/**
* InterfaceDb is the entire interface state for this system providing link
* status and IPv4 / IPv6 LinkLocal addresses. Spark subscribes the interface
Expand Down Expand Up @@ -219,6 +243,12 @@ struct AdjacencyDatabase {
* of a route.
*/
7: i32 nodeMetricIncrementVal = 0;

/**
* Optional attribute to store status of all links in router
* which are up and even down.
*/
8: optional LinkStatusRecords linkStatusRecords;
}

/**
Expand Down
5 changes: 5 additions & 0 deletions openr/link-monitor/InterfaceEntry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ InterfaceEntry::updateAttrs(int ifIndex, bool isUp) {
isUpdated |= ((std::exchange(info_.ifIndex, ifIndex) != ifIndex) ? 1 : 0);
isUpdated |= ((std::exchange(info_.isUp, isUp) != isUp) ? 1 : 0);

// Update timestamp only at when link status changed
if (wasUp != isUp) {
info_.statusChangeTimestamp = getUnixTimeStampMs();
}

// Look for specific case of interface state transition to DOWN
if (wasUp != isUp and wasUp) {
// Penalize backoff on transitioning to DOWN state
Expand Down
5 changes: 5 additions & 0 deletions openr/link-monitor/InterfaceEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ class InterfaceEntry final {
return info_.isUp;
}

int64_t
getStatusChangeTimestamp() const {
return info_.statusChangeTimestamp;
}

// returns const references for optimization
const std::unordered_set<folly::CIDRNetwork>&
getNetworks() const {
Expand Down
46 changes: 46 additions & 0 deletions openr/link-monitor/LinkMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ LinkMonitor::LinkMonitor(
: nodeId_(config->getNodeName()),
enablePerfMeasurement_(
*config->getLinkMonitorConfig().enable_perf_measurement()),
enableLinkStatusMeasurement_(
*config->getLinkMonitorConfig().enable_link_status_measurement()),
enableV4_(config->isV4Enabled()),
enableSegmentRouting_(config->isSegmentRoutingEnabled()),
prefixForwardingType_(*config->getConfig().prefix_forwarding_type()),
Expand Down Expand Up @@ -519,6 +521,20 @@ LinkMonitor::neighborDownEvent(const NeighborEvent& event) {
<< " is down on interface " << localIfName;
fb303::fbData->addStatValue("link_monitor.neighbor_down", 1, fb303::SUM);

// A neighbor is down, but it's not necessary that link is down.
// So we may not receive down netlink event.
// However, we consider a down neighbor event indicates link to the
// neighbor is down in link event database context.
auto linkStatus = linkStatusRecords_.linkStatusMap()->find(localIfName);
if (linkStatus != linkStatusRecords_.linkStatusMap()->end() &&
*linkStatus->second.status() == thrift::LinkStatusEnum::UP) {
// If link is up, change it to down
updateLinkStatusRecords(
localIfName,
thrift::LinkStatusEnum::DOWN,
getUnixTimeStampMs() /* now */);
}

auto areaAdjIt = adjacencies_.find(area);
// No corresponding adj, ignore.
if (areaAdjIt == adjacencies_.end()) {
Expand Down Expand Up @@ -1040,6 +1056,13 @@ LinkMonitor::buildAdjacencyDatabase(const std::string& area) {
DCHECK(!adjDb.perfEvents().has_value());
}

// Add link events if enabled
if (enableLinkStatusMeasurement_) {
adjDb.linkStatusRecords() = linkStatusRecords_;
} else {
DCHECK(!adjDb.linkStatusRecords().has_value());
}

return adjDb;
}

Expand Down Expand Up @@ -1159,6 +1182,12 @@ LinkMonitor::syncInterfaces() {
// Update link attributes
const bool wasUp = interfaceEntry->isUp();
interfaceEntry->updateAttrs(info.ifIndex, info.isUp);
// If link status changes, keep its status/timestamp into record
updateLinkStatusRecords(
info.ifName,
interfaceEntry->isUp() ? thrift::LinkStatusEnum::UP
: thrift::LinkStatusEnum::DOWN,
interfaceEntry->getStatusChangeTimestamp());

// Event logging
logLinkEvent(
Expand Down Expand Up @@ -1201,6 +1230,12 @@ LinkMonitor::processLinkEvent(fbnl::Link&& link) {
if (interfaceEntry) {
const bool wasUp = interfaceEntry->isUp();
interfaceEntry->updateAttrs(ifIndex, isUp);
// If link status changes, keep its status/timestamp into record
updateLinkStatusRecords(
ifName,
interfaceEntry->isUp() ? thrift::LinkStatusEnum::UP
: thrift::LinkStatusEnum::DOWN,
interfaceEntry->getStatusChangeTimestamp());
logLinkEvent(
interfaceEntry->getIfName(),
wasUp,
Expand Down Expand Up @@ -1955,4 +1990,15 @@ LinkMonitor::getTotalAdjacencies() {
return numAdjacencies;
}

void
LinkMonitor::updateLinkStatusRecords(
const std::string& ifName,
thrift::LinkStatusEnum ifStatus,
int64_t ifStatusChangeTimestamp) {
thrift::LinkStatus linkStatus;
linkStatus.status() = ifStatus;
linkStatus.unixTs() = ifStatusChangeTimestamp;
linkStatusRecords_.linkStatusMap()[ifName] = std::move(linkStatus);
}

} // namespace openr
11 changes: 11 additions & 0 deletions openr/link-monitor/LinkMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ class LinkMonitor final : public OpenrEventBase {
// Total # of adjacencies stored.
size_t getTotalAdjacencies();

void updateLinkStatusRecords(
const std::string& ifName,
thrift::LinkStatusEnum ifStatus,
int64_t ifStatusChangeTimestamp);

/*
* [Logging]
*
Expand Down Expand Up @@ -326,6 +331,9 @@ class LinkMonitor final : public OpenrEventBase {
const std::string nodeId_;
// enable performance measurement
const bool enablePerfMeasurement_{false};
// keep track of current status of all links in router
// with timestamps at when they change their status.
const bool enableLinkStatusMeasurement_{false};
// enable v4
bool enableV4_{false};
// enable segment routing
Expand Down Expand Up @@ -390,6 +398,9 @@ class LinkMonitor final : public OpenrEventBase {
std::unordered_map<std::string /* interface name */, InterfaceEntry>
interfaces_;

// all links with their status and timestamp at when they change status
thrift::LinkStatusRecords linkStatusRecords_;

// Container storing map of advertised prefixes - Map<prefix, list<area>>
std::map<folly::CIDRNetwork, std::vector<std::string>> advertisedPrefixes_;

Expand Down
109 changes: 108 additions & 1 deletion openr/link-monitor/tests/LinkMonitorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,23 @@ const auto adj_3_2 = createThriftAdjacency(

void
printAdjDb(const thrift::AdjacencyDatabase& adjDb) {
std::string linkStatusRecordStr = "";
if (adjDb.linkStatusRecords().has_value()) {
for (auto const& [ifName, linkStatusRecord] :
*adjDb.linkStatusRecords()->linkStatusMap()) {
linkStatusRecordStr += fmt::format(
"{} {} at {},",
ifName,
*linkStatusRecord.status() == thrift::LinkStatusEnum::UP ? "UP"
: "DOWN",
*linkStatusRecord.unixTs());
}
}
LOG(INFO) << "Node: " << *adjDb.thisNodeName()
<< ", Overloaded: " << *adjDb.isOverloaded()
<< ", Node Metric Increment: " << *adjDb.nodeMetricIncrementVal()
<< ", Label: " << *adjDb.nodeLabel() << ", area: " << *adjDb.area();
<< ", Label: " << *adjDb.nodeLabel() << ", area: " << *adjDb.area()
<< ", Link Status Records: " << linkStatusRecordStr;
for (auto const& adj : *adjDb.adjacencies()) {
LOG(INFO) << " " << *adj.otherNodeName() << "@" << *adj.ifName()
<< ", metric: " << *adj.metric() << ", label: " << *adj.adjLabel()
Expand Down Expand Up @@ -2898,6 +2911,100 @@ TEST_F(LinkMonitorTestFixture, NotAllNeighborsUpInitializationTest) {
}
}

class LinkStatusTestFixture : public LinkMonitorTestFixture {
public:
thrift::OpenrConfig
createConfig() override {
auto tConfig = LinkMonitorTestFixture::createConfig();

// enable feature that keeps track of link status changes
tConfig.link_monitor_config()->enable_link_status_measurement() = true;

return tConfig;
}
};

// Test that link status record is updated with link status change.
TEST_F(LinkStatusTestFixture, LinkStatusRecords) {
const std::string ifName = "iface_2_1";
std::string key = "adj:node-1";
std::string area = kTestingAreaName;
int64_t ifTs = INT64_MAX;

// Create an UP interface with UP neighbor.
// KvStore should have an entry in LinkStatusRecords with status UP.
{
nlEventsInjector->sendLinkEvent(ifName, 100, true /* up */);
recvAndReplyIfUpdate();
auto neighborEvent = nb2_up_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));

std::optional<thrift::Value> value = getPublicationValueForKey(key);
auto adjDb = readThriftObjStr<thrift::AdjacencyDatabase>(
value->value().value(), serializer);
printAdjDb(adjDb);
ASSERT_TRUE(adjDb.linkStatusRecords().has_value());
auto linkStatusMap = adjDb.linkStatusRecords()->linkStatusMap();
EXPECT_EQ(linkStatusMap->size(), 1);
EXPECT_EQ(linkStatusMap->count(ifName), 1);
EXPECT_EQ(*linkStatusMap->at(ifName).status(), thrift::LinkStatusEnum::UP);
ifTs = *linkStatusMap->at(ifName).unixTs();
}

// Flip the interface DOWN and so neighbor is DOWN.
// KvStore should still have one entry in LinkStatusRecords, but status is
// now DOWN and timestamp is larger.
{
nlEventsInjector->sendLinkEvent(ifName, 100, false /* down */);
recvAndReplyIfUpdate();
auto neighborEvent = nb2_down_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));

std::optional<thrift::Value> value = getPublicationValueForKey(key);
auto adjDb = readThriftObjStr<thrift::AdjacencyDatabase>(
value->value().value(), serializer);
printAdjDb(adjDb);
ASSERT_TRUE(adjDb.linkStatusRecords().has_value());
auto linkStatusMap = adjDb.linkStatusRecords()->linkStatusMap();
EXPECT_EQ(linkStatusMap->size(), 1);
EXPECT_EQ(linkStatusMap->count(ifName), 1);
EXPECT_EQ(
*linkStatusMap->at(ifName).status(), thrift::LinkStatusEnum::DOWN);
EXPECT_GE(*linkStatusMap->at(ifName).unixTs(), ifTs);
ifTs = *linkStatusMap->at(ifName).unixTs();
}

// Flip the interface back to UP with UP neighbor. And then, push
// neighbor DOWN but link to it is still UP (i.e., neighbor crashes).
// KvStore should still have one entry in LinkStatusRecords, but status is
// now DOWN (last status) and timestamp is the largest.
{
nlEventsInjector->sendLinkEvent(ifName, 100, true /* up */);
recvAndReplyIfUpdate();
auto upNeighborEvent = nb2_up_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(upNeighborEvent)})));

auto downNeighborEvent = nb2_down_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(downNeighborEvent)})));

std::optional<thrift::Value> value = getPublicationValueForKey(key);
auto adjDb = readThriftObjStr<thrift::AdjacencyDatabase>(
value->value().value(), serializer);
printAdjDb(adjDb);
ASSERT_TRUE(adjDb.linkStatusRecords().has_value());
auto linkStatusMap = adjDb.linkStatusRecords()->linkStatusMap();
EXPECT_EQ(linkStatusMap->size(), 1);
EXPECT_EQ(linkStatusMap->count(ifName), 1);
EXPECT_EQ(
*linkStatusMap->at(ifName).status(), thrift::LinkStatusEnum::DOWN);
EXPECT_GE(*linkStatusMap->at(ifName).unixTs(), ifTs);
}
}

int
main(int argc, char* argv[]) {
// Parse command line flags
Expand Down

0 comments on commit ccb86a5

Please sign in to comment.