diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index 6c6ee7e07..53381d5c0 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -10,7 +10,12 @@ #include #include +#include +#include +#include +#include #include +#include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { @@ -20,8 +25,8 @@ std::string PCTranslator::encode(const std::string& inputDataset) { PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); - PCTranslator::transformDataset(inputDataset, pcInstructionSets); - return ""; + return PCTranslator::transformDataset( + inputDataset, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -34,7 +39,13 @@ PCTranslator::retrieveInstructionSets( std::vector& instructionSetNames) { std::vector> pcInstructionSets; for (auto instructionSetName : instructionSetNames) { - auto file_path = instructionSetBasePath + instructionSetName + ".json"; + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), '\''), + instructionSetName.end()); + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), ' '), + instructionSetName.end()); + auto file_path = instructionSetBasePath_ + instructionSetName + ".json"; auto contents = fbpcf::io::FileIOWrappers::readFile(file_path); pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents)); } @@ -55,16 +66,52 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( enabledFeatureFlags.begin(), enabledFeatureFlags.end(), std::back_inserter(validPCInstructionSets), - [](const std::string& feature) { return feature.find("pc_instr") == 0; }); + [](const std::string& feature) { + return feature.find("pc_instr") != std::string::npos; + }); return validPCInstructionSets; } -void PCTranslator::transformDataset( - const std::string& /* inputData */, - const std::vector>& - pcInstructionSets) { - throw std::runtime_error("Unimplemented"); +std::string PCTranslator::transformDataset( + const std::string& inputData, + std::shared_ptr pcInstructionSet) { + // Parse the input CSV + auto lineNo = 0; + std::vector> inputColums; + private_measurement::csv::readCsv( + inputData, + [&](const std::vector& header, + const std::vector& parts) { + std::vector inputColumnPerRow; + for (std::vector::size_type i = 0; i < header.size(); + ++i) { + auto column = header[i]; + auto value = std::atoi(parts[i].c_str()); + auto iter = std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column); + if (iter != pcInstructionSet->getGroupByIds().end()) { + inputColumnPerRow.push_back(value); + } + } + + inputColums.push_back(inputColumnPerRow); + lineNo++; + }); + + auto filters = std::make_unique< + std::vector>>(0); + std::unique_ptr encoder = + std::make_unique( + std::move(filters)); + + auto encodedIndexes = encoder->generateORAMIndexes(inputColums); + + // TODO : Append the enodedIndexes at the end of publisher output and return + // output path. + return ""; } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index ed59d6915..cebf5adb7 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -26,12 +26,13 @@ class PCTranslator { explicit PCTranslator(const std::string& pcsFeatures) : pcsFeatures_(pcsFeatures) {} - /* - * Method to encode the configurable fields in input dataset as per the active - * pc instruction sets for the run. This method will output the path of - * transformed input dataset, which can be used in further PC run. - */ - std::string encode(const std::string& inputDataset); + explicit PCTranslator( + const std::string& pcs_features, + const std::string& instruction_set_base_path) + : pcs_features_(pcs_features), + instruction_set_base_path_(instruction_set_base_path) {} + + std::string encode(const std::string& input_dataset); /* * Method to decode final aggregated output with the encoded breakdown Ids as @@ -42,8 +43,8 @@ class PCTranslator { std::string decode(const std::string& aggregatedOutputDataset); private: - std::string pcsFeatures_; - const std::string instructionSetBasePath = + std::string pcsfeatures_; + std::string instructionSetBasePath = "https://pc-translator.s3.us-west-2.amazonaws.com/"; std::vector> retrieveInstructionSets( std::vector& instructionSetNames); @@ -51,10 +52,9 @@ class PCTranslator { const std::string& pcsFeatures); std::shared_ptr parseInstructionSet( const std::string& instructionSet); - void transformDataset( + std::string transformDataset( const std::string& input_data, - const std::vector>& - pcInstructionSets); + std::shared_ptr pcInstructionSet); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp new file mode 100644 index 000000000..f182efa9a --- /dev/null +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include "../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/PCTranslator.h" + +namespace pc_translator { +class TestPCTranslator : public ::testing::Test { + public: + protected: + std::string pcs_features_; + std::string test_instruction_set_base_path_; + std::string test_publisher_input_path_; + + void SetUp() override { + pcs_features_ = + "'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'"; + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + test_instruction_set_base_path_ = baseDir + "input_processing/"; + test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + } +}; + +TEST_F(TestPCTranslator, TestEncode) { + auto pcTranslator = std::make_shared( + pcs_features_, test_instruction_set_base_path_); + auto outputPath = pcTranslator->encode(test_publisher_input_path_); + EXPECT_EQ(outputPath, ""); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp index 32a38ff3a..d4ea3fd57 100644 --- a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp +++ b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp @@ -13,7 +13,6 @@ #include #include "../../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" -#include "folly/Random.h" namespace pc_translator { class TestPCInstructionSet : public ::testing::Test { @@ -24,7 +23,7 @@ class TestPCInstructionSet : public ::testing::Test { void SetUp() override { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); - testInstructionSetPath_ = baseDir + "test_instruction_set.json"; + testInstructionSetPath_ = baseDir + "pc_instr_test_instruction_set.json"; } }; @@ -35,7 +34,7 @@ TEST_F(TestPCInstructionSet, TestStandardWorkflowTest) { auto groupByIds = pcInstructionSet->getGroupByIds(); auto filterConstraints = pcInstructionSet->getFilterConstraints(); EXPECT_EQ(groupByIds.size(), 2); - EXPECT_EQ(filterConstraints.size(), 4); + EXPECT_EQ(filterConstraints.size(), 3); EXPECT_EQ(filterConstraints[0].getName(), "gender"); EXPECT_EQ(filterConstraints[0].getType(), "EQ"); EXPECT_EQ(filterConstraints[0].getValue(), 0); diff --git a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json similarity index 89% rename from fbpcs/pc_translator/tests/input_processing/test_instruction_set.json rename to fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json index 7a0008b19..7fa183d2f 100644 --- a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json +++ b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json @@ -30,10 +30,6 @@ { "constraint_type": "EQ", "value": "0" - }, - { - "constraint_type": "EQ", - "value": "1" } ] }, diff --git a/fbpcs/pc_translator/tests/publisher_unittest.csv b/fbpcs/pc_translator/tests/publisher_unittest.csv new file mode 100644 index 000000000..691ecff30 --- /dev/null +++ b/fbpcs/pc_translator/tests/publisher_unittest.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp, age, gender +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430, 25, 0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401, 26, 1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0, 44, 0 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0, 23, 0 +a87ff679a2f3e71d9181a67b7542122c,0,0,0, 25, 0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461, 24, 1 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052, 25, 1 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831, 26, 0 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530, 50, 0 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972, 25, 1 +d3d9446802a44259755d38e6d163e820,0,0,0, 25, 0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0, 25, 0