diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp new file mode 100644 index 000000000..af03f281d --- /dev/null +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/PCTranslator.h" +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fbpcs/emp_games/common/Csv.h" +#include "folly/String.h" + +namespace pc_translator { + +std::string PCTranslator::encode(const std::string& inputDatasetPath) { + auto validInstructionSetNames = + PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); + auto pcInstructionSets = + PCTranslator::retrieveInstructionSets(validInstructionSetNames); + if (pcInstructionSets.empty()) { + // No instruction set found. return the input dataset path. + return inputDatasetPath; + } + return PCTranslator::transformDataset( + inputDatasetPath, pcInstructionSets.front()); +} + +std::string PCTranslator::decode( + const std::string& /* aggregatedOutputDataset */) { + throw std::runtime_error("Unimplemented"); +} + +std::vector> +PCTranslator::retrieveInstructionSets( + std::vector& instructionSetNames) { + std::vector> pcInstructionSets; + for (auto instructionSetName : instructionSetNames) { + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), '\''), + instructionSetName.end()); + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), ' '), + instructionSetName.end()); + auto file_path = instructionSetBasePath_ + instructionSetName + ".json"; + auto contents = fbpcf::io::FileIOWrappers::readFile(file_path); + pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents)); + } + return pcInstructionSets; +} + +std::vector PCTranslator::retrieveInstructionSetNamesForRun( + const std::string& pcsFeatures) { + std::set enabledFeatureFlags; + folly::splitTo( + ',', + pcsFeatures, + std::inserter(enabledFeatureFlags, enabledFeatureFlags.begin()), + true); + + std::vector validPCInstructionSets; + std::copy_if( + enabledFeatureFlags.begin(), + enabledFeatureFlags.end(), + std::back_inserter(validPCInstructionSets), + [](const std::string& feature) { + return feature.find("pc_instr") != std::string::npos; + }); + + return validPCInstructionSets; +} + +std::string PCTranslator::transformDataset( + const std::string& inputDatasetPath, + std::shared_ptr pcInstructionSet) { + // Parse the input CSV + auto lineNo = 0; + std::vector> inputColums; + std::vector outputHeader; + std::vector> outputContent; + private_measurement::csv::readCsv( + inputDatasetPath, + [&](const std::vector& header, + const std::vector& parts) { + std::vector inputColumnPerRow; + std::string column; + std::uint32_t value; + bool found = false; + std::vector outputContentPerRow; + for (std::vector::size_type i = 0; i < header.size(); + ++i) { + column = header[i]; + value = std::atoi(parts[i].c_str()); + found = + (std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column) != pcInstructionSet->getGroupByIds().end()); + if (found) { + inputColumnPerRow.push_back(value); + } else { + if (lineNo == 0) { + outputHeader.push_back(header[i]); + } + outputContentPerRow.push_back(parts[i]); + } + } + + inputColums.push_back(inputColumnPerRow); + outputContent.push_back(outputContentPerRow); + lineNo++; + }); + + auto filters = std::make_unique< + std::vector>>(0); + std::unique_ptr encoder = + std::make_unique( + std::move(filters)); + + auto encodedIndexes = encoder->generateORAMIndexes(inputColums); + + auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1); + auto output_dataset_path = dir + "transformed_publisher_input.csv"; + + PCTranslator::putOutputData( + output_dataset_path, outputHeader, outputContent, encodedIndexes); + return output_dataset_path; +} + +void PCTranslator::putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes) { + outputHeader.push_back("breakdown_id"); + + if (outputContent.size() != encodedIndexes.size()) { + throw std::runtime_error( + "Encoded index vector size should match the input vector size."); + } + + for (std::vector::size_type i = 0; i < encodedIndexes.size(); + ++i) { + auto indexVec = std::to_string(encodedIndexes[i]); + outputContent[i].push_back(indexVec); + } + + private_measurement::csv::writeCsv( + output_dataset_path, outputHeader, outputContent); +} + +std::shared_ptr PCTranslator::parseInstructionSet( + std::string& instructionSet) { + return std::make_shared(PCInstructionSet::fromDynamic( + folly::parseJson(std::move(instructionSet)))); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h new file mode 100644 index 000000000..d0a978855 --- /dev/null +++ b/fbpcs/pc_translator/PCTranslator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +namespace pc_translator { + +/* + * This class contains functions required for PC Translator during actual run + * i.e. retrieving the PC instruction sets, filtering the set per active GK for + * run, encoding and decoding the dataset files input as per the instruction + * set. + */ +class PCTranslator { + public: + explicit PCTranslator(const std::string& pcsFeatures) + : pcsFeatures_(pcsFeatures) {} + + explicit PCTranslator( + const std::string& pcsFeatures, + const std::string& instructionSetBasePath) + : pcsFeatures_(pcsFeatures), + instructionSetBasePath_(instructionSetBasePath) {} + + std::string encode(const std::string& inputDataset); + + /* + * Method to decode final aggregated output with the encoded breakdown Ids as + * the keys. This method will decode the breakdown Ids to original group Id + * values and format the aggregated output as per the new keys. Output of this + * method would be the path of the decoded aggregated output. + */ + std::string decode(const std::string& aggregatedOutputDataset); + + private: + std::string pcsFeatures_; + std::string instructionSetBasePath_ = + "https://pc-translator.s3.us-west-2.amazonaws.com/"; + std::vector> retrieveInstructionSets( + std::vector& instructionSetNames); + std::vector retrieveInstructionSetNamesForRun( + const std::string& pcsFeatures); + std::shared_ptr parseInstructionSet( + std::string& instructionSet); + std::string transformDataset( + const std::string& inputData, + std::shared_ptr pcInstructionSet); + + void putOutputData( + const std::string& output_dataset_path, + std::vector& outputHeader, + std::vector>& outputContent, + const std::vector& encodedIndexes); +}; + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/FilterConstraint.cpp b/fbpcs/pc_translator/input_processing/FilterConstraint.cpp new file mode 100644 index 000000000..6bc8a0dac --- /dev/null +++ b/fbpcs/pc_translator/input_processing/FilterConstraint.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/input_processing/FilterConstraint.h" + +#include +#include +#include +#include + +namespace pc_translator { +FilterConstraint::FilterConstraint( + const std::string& name, + const std::string& type, + int value) + : name_(name), type_(type), value_(value) {} + +std::string FilterConstraint::getName() const { + return name_; +} + +std::string FilterConstraint::getType() const { + return type_; +} + +int FilterConstraint::getValue() const { + return value_; +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/FilterConstraint.h b/fbpcs/pc_translator/input_processing/FilterConstraint.h new file mode 100644 index 000000000..00cbbc189 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/FilterConstraint.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace pc_translator { + +/* + * Class to store each filter constraint include in the PC instruction set. + */ +class FilterConstraint { + public: + FilterConstraint(const std::string& name, const std::string& type, int value); + + /* + * Name of the filter constraint i.e. the field on which this filter is to be + * applied. + */ + std::string getName() const; + + /* + * Constraint type i.e. LT, LTE, EQ, NEQ etc. + */ + std::string getType() const; + + int getValue() const; + + private: + std::string name_; + std::string type_; + int value_; +}; + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp b/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp new file mode 100644 index 000000000..bd1c995d8 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/PCInstructionSet.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +#include +#include +#include +#include +#include + +namespace pc_translator { + +const std::vector& PCInstructionSet::getGroupByIds() const { + return groupByIds; +} + +const std::vector& PCInstructionSet::getFilterConstraints() + const { + return filterConstraints; +} + +PCInstructionSet PCInstructionSet::fromDynamic(const folly::dynamic& obj) { + PCInstructionSet pcInstructionSet; + auto aggregationConfig = obj["aggregated_metrics"]; + auto groupByFields = aggregationConfig["group_by"]; + + for (auto groupByField : groupByFields) { + pcInstructionSet.groupByIds.push_back(groupByField.asString()); + } + + auto filterConstraintsFields = aggregationConfig["filter"]; + + for (auto& [key, constraints] : filterConstraintsFields.items()) { + std::string name = key.asString(); + for (auto constraint : constraints) { + auto constraintType = constraint["constraint_type"].asString(); + auto constraintValue = constraint["value"].asInt(); + FilterConstraint filterConstraint(name, constraintType, constraintValue); + pcInstructionSet.filterConstraints.push_back(filterConstraint); + } + } + + return pcInstructionSet; +} + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/input_processing/PCInstructionSet.h b/fbpcs/pc_translator/input_processing/PCInstructionSet.h new file mode 100644 index 000000000..01e710f54 --- /dev/null +++ b/fbpcs/pc_translator/input_processing/PCInstructionSet.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include "fbpcs/pc_translator/input_processing/FilterConstraint.h" + +namespace pc_translator { + +/* + * Class to store PC Instruction set. This class contains a list of group Ids as + * well as list of filter constraints. + */ +class PCInstructionSet { + public: + /* + * Method to all group Ids from the PC instruction set. + */ + const std::vector& getGroupByIds() const; + + /* + * Method to get all filter constraints from PC instruction set. + */ + const std::vector& getFilterConstraints() const; + + /* + * Method to get parse and create PCInstructionSet instance. + */ + static PCInstructionSet fromDynamic(const folly::dynamic& obj); + + private: + std::vector groupByIds; + std::vector filterConstraints; + + void parseJson(const std::string& json); +}; + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp new file mode 100644 index 000000000..7febfea45 --- /dev/null +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include "../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/PCTranslator.h" + +namespace pc_translator { +class TestPCTranslator : public ::testing::Test { + public: + protected: + std::string pcs_features_; + std::string test_instruction_set_base_path_; + std::string test_publisher_input_path_; + std::string test_transformed_output_path_; + std::string expected_transformed_output_path_; + + void SetUp() override { + pcs_features_ = + "'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'"; + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + test_instruction_set_base_path_ = baseDir + "input_processing/"; + test_publisher_input_path_ = "/tmp/publisher_unittest.csv"; + test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv"; + expected_transformed_output_path_ = + baseDir + "expected_transformed_publisher_input.csv"; + auto contents = + fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv"); + fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents); + } + + void TearDown() override { + std::remove(test_publisher_input_path_.c_str()); + std::remove(test_transformed_output_path_.c_str()); + } +}; + +TEST_F(TestPCTranslator, TestEncode) { + auto pcTranslator = std::make_shared( + pcs_features_, test_instruction_set_base_path_); + auto outputPath = pcTranslator->encode(test_publisher_input_path_); + auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath); + auto expectedContents = + fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_); + EXPECT_EQ(outputPath, test_transformed_output_path_); + EXPECT_EQ(contents, expectedContents); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv new file mode 100644 index 000000000..247407907 --- /dev/null +++ b/fbpcs/pc_translator/tests/expected_transformed_publisher_input.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp,breakdown_id +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0,2 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3 +a87ff679a2f3e71d9181a67b7542122c,0,0,0,0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5 +d3d9446802a44259755d38e6d163e820,0,0,0,0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0,0 diff --git a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp new file mode 100644 index 000000000..d4ea3fd57 --- /dev/null +++ b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include +#include +#include "../../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" + +namespace pc_translator { +class TestPCInstructionSet : public ::testing::Test { + public: + protected: + std::string testInstructionSetPath_; + + void SetUp() override { + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + testInstructionSetPath_ = baseDir + "pc_instr_test_instruction_set.json"; + } +}; + +TEST_F(TestPCInstructionSet, TestStandardWorkflowTest) { + auto pcInstructionSet = std::make_shared( + PCInstructionSet::fromDynamic(folly::parseJson( + fbpcf::io::FileIOWrappers::readFile(testInstructionSetPath_)))); + auto groupByIds = pcInstructionSet->getGroupByIds(); + auto filterConstraints = pcInstructionSet->getFilterConstraints(); + EXPECT_EQ(groupByIds.size(), 2); + EXPECT_EQ(filterConstraints.size(), 3); + EXPECT_EQ(filterConstraints[0].getName(), "gender"); + EXPECT_EQ(filterConstraints[0].getType(), "EQ"); + EXPECT_EQ(filterConstraints[0].getValue(), 0); +} + +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json new file mode 100644 index 000000000..7fa183d2f --- /dev/null +++ b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json @@ -0,0 +1,41 @@ +{ + "publisher_input": { + "num_impressions": "int", + "num_clicks": "int", + "total_spend": "int", + "opportunity_timstamp": "int", + "test_flag": "int", + "age": "int", + "gender": "Optional[int]", + "breakdown_id": "Optional[int]" + }, + "partner_input": { + "value": "int", + "event_timestamp": "int", + "partner_cohort_id": "Optional[int]" + }, + "aggregated_metrics": { + "filter": { + "age": [ + { + "constraint_type": "GTE", + "value": "25" + }, + { + "constraint_type": "LTE", + "value": "40" + } + ], + "gender": [ + { + "constraint_type": "EQ", + "value": "0" + } + ] + }, + "group_by": [ + "age", + "gender" + ] + } +} diff --git a/fbpcs/pc_translator/tests/publisher_unittest.csv b/fbpcs/pc_translator/tests/publisher_unittest.csv new file mode 100644 index 000000000..691ecff30 --- /dev/null +++ b/fbpcs/pc_translator/tests/publisher_unittest.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp, age, gender +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430, 25, 0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401, 26, 1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0, 44, 0 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0, 23, 0 +a87ff679a2f3e71d9181a67b7542122c,0,0,0, 25, 0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461, 24, 1 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052, 25, 1 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831, 26, 0 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530, 50, 0 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972, 25, 1 +d3d9446802a44259755d38e6d163e820,0,0,0, 25, 0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0, 25, 0