Introduced dp_noise_mechanisms.h/cc which contains code that generate…

…s noise mechanisms for both open and closed domain histograms (much of the logic came from dp_open_domain_histogram.cc). The function CreateDPHistogramBundle is used by both DPClosedDomainHistogram::Report() and DPOpenDomainHistogram::Report(). Also made some changes to DPGroupByFactory::CreateInternal to ensure preconditions. PiperOrigin-RevId: 661452978
google-parfait · Aug 9, 2024 · 3d8fa0d · 3d8fa0d
1 parent 3f15f0f
commit 3d8fa0d
Show file tree

Hide file tree

Showing 11 changed files with 882 additions and 308 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -8,6 +8,24 @@ and this project adheres to
 
 ## Unreleased
 
+### Added
+
+*   The `dp_noise_mechanisms` header and source files: contains functions that
+    generate `differential_privacy::LaplaceMechanism` or
+    `differential_privacy::GaussianMechanism`, based upon privacy parameters and
+    norm bounds. Each of these functions return a `DPHistogramBundle` struct,
+    which contains the mechanism, the threshold needed for DP open-domain
+    histograms, and a boolean indicating whether Laplace noise was used.
+
+### Changed
+
+*   `DPClosedDomainHistogram::Report` and `DPOpenDomainHistogram::Report`: they
+    both use the `DPHistogramBundles` produced by the `CreateDPHistogramBundle`
+    function in `dp_noise_mechanisms`.
+*   `DPGroupByFactory::CreateInternal`: when `delta` is not provided, check if
+    the right norm bounds are provided to compute L1 sensitivity (for the
+    Laplace mech).
+
 ## Release 0.84.0
 
 ### Added

diff --git a/tensorflow_federated/cc/core/impl/aggregation/core/BUILD b/tensorflow_federated/cc/core/impl/aggregation/core/BUILD
@@ -119,6 +119,33 @@ cc_library(
     deps = [":tensor"],
 )
 
+cc_library(
+    name = "dp_noise_mechanisms",
+    srcs = ["dp_noise_mechanisms.cc"],
+    hdrs = [
+        "dp_noise_mechanisms.h",
+    ],
+    deps = [
+        ":dp_fedsql_constants",
+        "//tensorflow_federated/cc/core/impl/aggregation/base",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_cc_differential_privacy//algorithms:numerical-mechanisms",
+        "@com_google_cc_differential_privacy//algorithms:partition-selection",
+    ],
+)
+
+cc_test(
+    name = "dp_noise_mechanisms_test",
+    srcs = ["dp_noise_mechanisms_test.cc"],
+    deps = [
+        ":dp_fedsql_constants",
+        ":dp_noise_mechanisms",
+        "//tensorflow_federated/cc/core/impl/aggregation/base",
+        "//tensorflow_federated/cc/testing:oss_test_main",
+        "//tensorflow_federated/cc/testing:status_matchers",
+    ],
+)
+
 # TODO: b/352020454 - Create one library per cc & hh pair. Make them aggregation_cores deps.
 cc_library(
     name = "aggregation_cores",
@@ -148,6 +175,7 @@ cc_library(
         ":agg_core_cc_proto",
         ":aggregator",
         ":dp_fedsql_constants",
+        ":dp_noise_mechanisms",
         ":fedsql_constants",
         ":intrinsic",
         ":tensor",
@@ -159,11 +187,9 @@ cc_library(
         "@com_google_absl//absl/container:node_hash_map",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/random",
-        "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
         "@com_google_cc_differential_privacy//algorithms:numerical-mechanisms",
-        "@com_google_cc_differential_privacy//algorithms:partition-selection",
     ],
     alwayslink = 1,
 )

diff --git a/tensorflow_federated/cc/core/impl/aggregation/core/dp_closed_domain_histogram.cc b/tensorflow_federated/cc/core/impl/aggregation/core/dp_closed_domain_histogram.cc
@@ -22,11 +22,14 @@
 #include <vector>
 
 #include "absl/container/fixed_array.h"
+#include "algorithms/numerical-mechanisms.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/base/monitoring.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/agg_core.pb.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/composite_key_combiner.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/datatype.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/dp_composite_key_combiner.h"
+#include "tensorflow_federated/cc/core/impl/aggregation/core/dp_fedsql_constants.h"
+#include "tensorflow_federated/cc/core/impl/aggregation/core/dp_noise_mechanisms.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/group_by_aggregator.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/input_tensor_list.h"
 #include "tensorflow_federated/cc/core/impl/aggregation/core/intrinsic.h"
@@ -41,23 +44,26 @@
 
 namespace tensorflow_federated {
 namespace aggregation {
+using differential_privacy::NumericalMechanism;
+
 namespace {
 // Given a tensor containing a column of aggregates and an ordinal, push the
 // aggregate associated with that ordinal to the back of a MutableVectorData
 // container. If the ordinal is kNoOrdinal, push 0 instead.
+// Adds noise if a mechanism is provided.
 template <typename T>
 void CopyAggregateFromColumn(const Tensor& column_of_aggregates,
-                             int64_t ordinal, MutableVectorData<T>& container) {
-  // Get the aggregate we will be copying over if it exists.
-  // Future CL will initialize value to a random number instead of 0.
-  T value = 0;
+                             int64_t ordinal, MutableVectorData<T>& container,
+                             NumericalMechanism* mechanism) {
+  T value = (mechanism == nullptr) ? 0 : mechanism->AddNoise(/*result=*/0);
   if (ordinal != kNoOrdinal) {
     value += column_of_aggregates.AsSpan<T>()[ordinal];
   }
 
-  // Add to the container.
+  // Add (possibly noisy) value to the container.
   container.push_back(value);
 }
+
 }  // namespace
 
 DPClosedDomainHistogram::DPClosedDomainHistogram(
@@ -135,6 +141,35 @@ StatusOr<OutputTensorList> DPClosedDomainHistogram::Report() && {
     domain_size *= domain_tensor.num_elements();
   }
 
+  // Make a noise mechanism for each aggregation.
+  std::vector<std::unique_ptr<NumericalMechanism>> mechanisms;
+  for (int i = 0; i < intrinsics().size(); ++i) {
+    const Intrinsic& intrinsic = intrinsics()[i];
+    // Do not bother making mechanism if epsilon is too large.
+    if (epsilon_per_agg_ >= kEpsilonThreshold) {
+      mechanisms.push_back(nullptr);
+      laplace_was_used_.push_back(false);
+      continue;
+    }
+
+    // Get norm bounds for the ith aggregation.
+    double linfinity_bound =
+        intrinsic.parameters[kLinfinityIndex].CastToScalar<double>();
+    double l1_bound = intrinsic.parameters[kL1Index].CastToScalar<double>();
+    double l2_bound = intrinsic.parameters[kL2Index].CastToScalar<double>();
+
+    // Create a noise mechanism out of those norm bounds and privacy params.
+    TFF_ASSIGN_OR_RETURN(
+        DPHistogramBundle noise_mechanism,
+        CreateDPHistogramBundle(epsilon_per_agg_, delta_per_agg_, l0_bound_,
+                                linfinity_bound, l1_bound, l2_bound,
+                                /*open_domain=*/false));
+    mechanisms.push_back(std::move(noise_mechanism.mechanism));
+
+    // Record whether Laplace will be used.
+    laplace_was_used_.push_back(noise_mechanism.use_laplace);
+  }
+
   // Create MutableVectorData containers, one for each output tensor, that are
   // each big enough to hold domain_size elements.
   // If all output tensors had the same type like int64_t we could create an
@@ -154,7 +189,13 @@ StatusOr<OutputTensorList> DPClosedDomainHistogram::Report() && {
   do {
     // Each composite key is associated with a row of the output. i-th entry of
     // that row will be written to i-th entry of noisy_aggregate_data.
+
+    // Maintain the index of the next key to output.
     int64_t key_to_output = 0;
+    // Maintain the index of the next mechanism to use.
+    int64_t mech_to_use = 0;
+
+    // Loop to populate the row of the output for the current composite key.
     for (int64_t i = 0; i < noisy_aggregate_data.size(); i++) {
       // Get the TensorData container we will be writing to (a column).
       TensorData& container = *(noisy_aggregate_data[i]);
@@ -198,7 +239,11 @@ StatusOr<OutputTensorList> DPClosedDomainHistogram::Report() && {
             column_of_aggregates.dtype(), T,
             CopyAggregateFromColumn<T>(
                 column_of_aggregates, ordinal,
-                dynamic_cast<MutableVectorData<T>&>(container)));
+                dynamic_cast<MutableVectorData<T>&>(container),
+                mechanisms[mech_to_use].get()));
+
+        // Move on to the next mechanism.
+        mech_to_use++;
       }
     }
   } while (IncrementDomainIndices(domain_indices));