Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logic to write output with encoded Ids. #2286

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions fbpcs/pc_translator/PCTranslator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "fbpcs/pc_translator/PCTranslator.h"
#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h"

#include <fbpcf/common/FunctionalUtil.h>
#include <fbpcf/io/api/FileIOWrappers.h>
#include <fbpcf/mpc_std_lib/oram/encoder/IFilter.h>
#include <fbpcf/mpc_std_lib/oram/encoder/IOramEncoder.h>
#include <fbpcf/mpc_std_lib/oram/encoder/OramEncoder.h>
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <set>
#include <stdexcept>
#include <string>
#include "fbpcs/emp_games/common/Csv.h"
#include "folly/String.h"

namespace pc_translator {

std::string PCTranslator::encode(const std::string& inputDatasetPath) {
auto validInstructionSetNames =
PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_);
auto pcInstructionSets =
PCTranslator::retrieveInstructionSets(validInstructionSetNames);
if (pcInstructionSets.empty()) {
// No instruction set found. return the input dataset path.
return inputDatasetPath;
}
return PCTranslator::transformDataset(
inputDatasetPath, pcInstructionSets.front());
}

std::string PCTranslator::decode(
const std::string& /* aggregatedOutputDataset */) {
throw std::runtime_error("Unimplemented");
}

std::vector<std::shared_ptr<PCInstructionSet>>
PCTranslator::retrieveInstructionSets(
std::vector<std::string>& instructionSetNames) {
std::vector<std::shared_ptr<PCInstructionSet>> pcInstructionSets;
for (auto instructionSetName : instructionSetNames) {
instructionSetName.erase(
remove(instructionSetName.begin(), instructionSetName.end(), '\''),
instructionSetName.end());
instructionSetName.erase(
remove(instructionSetName.begin(), instructionSetName.end(), ' '),
instructionSetName.end());
auto file_path = instructionSetBasePath_ + instructionSetName + ".json";
auto contents = fbpcf::io::FileIOWrappers::readFile(file_path);
pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents));
}
return pcInstructionSets;
}

std::vector<std::string> PCTranslator::retrieveInstructionSetNamesForRun(
const std::string& pcsFeatures) {
std::set<std::string> enabledFeatureFlags;
folly::splitTo<std::string>(
',',
pcsFeatures,
std::inserter(enabledFeatureFlags, enabledFeatureFlags.begin()),
true);

std::vector<std::string> validPCInstructionSets;
std::copy_if(
enabledFeatureFlags.begin(),
enabledFeatureFlags.end(),
std::back_inserter(validPCInstructionSets),
[](const std::string& feature) {
return feature.find("pc_instr") != std::string::npos;
});

return validPCInstructionSets;
}

std::string PCTranslator::transformDataset(
const std::string& inputDatasetPath,
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet) {
// Parse the input CSV
auto lineNo = 0;
std::vector<std::vector<uint32_t>> inputColums;
std::vector<std::string> outputHeader;
std::vector<std::vector<std::string>> outputContent;
private_measurement::csv::readCsv(
inputDatasetPath,
[&](const std::vector<std::string>& header,
const std::vector<std::string>& parts) {
std::vector<uint32_t> inputColumnPerRow;
std::string column;
std::uint32_t value;
bool found = false;
std::vector<std::string> outputContentPerRow;
for (std::vector<std::string>::size_type i = 0; i < header.size();
++i) {
column = header[i];
value = std::atoi(parts[i].c_str());
found =
(std::find(
pcInstructionSet->getGroupByIds().begin(),
pcInstructionSet->getGroupByIds().end(),
column) != pcInstructionSet->getGroupByIds().end());
if (found) {
inputColumnPerRow.push_back(value);
} else {
if (lineNo == 0) {
outputHeader.push_back(header[i]);
}
outputContentPerRow.push_back(parts[i]);
}
}

inputColums.push_back(inputColumnPerRow);
outputContent.push_back(outputContentPerRow);
lineNo++;
});

auto filters = std::make_unique<
std::vector<std::unique_ptr<fbpcf::mpc_std_lib::oram::IFilter>>>(0);
std::unique_ptr<fbpcf::mpc_std_lib::oram::IOramEncoder> encoder =
std::make_unique<fbpcf::mpc_std_lib::oram::OramEncoder>(
std::move(filters));

auto encodedIndexes = encoder->generateORAMIndexes(inputColums);

auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1);
auto output_dataset_path = dir + "transformed_publisher_input.csv";

PCTranslator::putOutputData(
output_dataset_path, outputHeader, outputContent, encodedIndexes);
return output_dataset_path;
}

void PCTranslator::putOutputData(
const std::string& output_dataset_path,
std::vector<std::string>& outputHeader,
std::vector<std::vector<std::string>>& outputContent,
const std::vector<uint32_t>& encodedIndexes) {
outputHeader.push_back("breakdown_id");

if (outputContent.size() != encodedIndexes.size()) {
throw std::runtime_error(
"Encoded index vector size should match the input vector size.");
}

for (std::vector<std::string>::size_type i = 0; i < encodedIndexes.size();
++i) {
auto indexVec = std::to_string(encodedIndexes[i]);
outputContent[i].push_back(indexVec);
}

private_measurement::csv::writeCsv(
output_dataset_path, outputHeader, outputContent);
}

std::shared_ptr<PCInstructionSet> PCTranslator::parseInstructionSet(
std::string& instructionSet) {
return std::make_shared<PCInstructionSet>(PCInstructionSet::fromDynamic(
folly::parseJson(std::move(instructionSet))));
}
} // namespace pc_translator
66 changes: 66 additions & 0 deletions fbpcs/pc_translator/PCTranslator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h"

namespace pc_translator {

/*
* This class contains functions required for PC Translator during actual run
* i.e. retrieving the PC instruction sets, filtering the set per active GK for
* run, encoding and decoding the dataset files input as per the instruction
* set.
*/
class PCTranslator {
public:
explicit PCTranslator(const std::string& pcsFeatures)
: pcsFeatures_(pcsFeatures) {}

explicit PCTranslator(
const std::string& pcsFeatures,
const std::string& instructionSetBasePath)
: pcsFeatures_(pcsFeatures),
instructionSetBasePath_(instructionSetBasePath) {}

std::string encode(const std::string& inputDataset);

/*
* Method to decode final aggregated output with the encoded breakdown Ids as
* the keys. This method will decode the breakdown Ids to original group Id
* values and format the aggregated output as per the new keys. Output of this
* method would be the path of the decoded aggregated output.
*/
std::string decode(const std::string& aggregatedOutputDataset);

private:
std::string pcsFeatures_;
std::string instructionSetBasePath_ =
"https://pc-translator.s3.us-west-2.amazonaws.com/";
std::vector<std::shared_ptr<PCInstructionSet>> retrieveInstructionSets(
std::vector<std::string>& instructionSetNames);
std::vector<std::string> retrieveInstructionSetNamesForRun(
const std::string& pcsFeatures);
std::shared_ptr<PCInstructionSet> parseInstructionSet(
std::string& instructionSet);
std::string transformDataset(
const std::string& inputData,
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet);

void putOutputData(
const std::string& output_dataset_path,
std::vector<std::string>& outputHeader,
std::vector<std::vector<std::string>>& outputContent,
const std::vector<uint32_t>& encodedIndexes);
};

} // namespace pc_translator
33 changes: 33 additions & 0 deletions fbpcs/pc_translator/input_processing/FilterConstraint.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "fbpcs/pc_translator/input_processing/FilterConstraint.h"

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

namespace pc_translator {
FilterConstraint::FilterConstraint(
const std::string& name,
const std::string& type,
int value)
: name_(name), type_(type), value_(value) {}

std::string FilterConstraint::getName() const {
return name_;
}

std::string FilterConstraint::getType() const {
return type_;
}

int FilterConstraint::getValue() const {
return value_;
}
} // namespace pc_translator
43 changes: 43 additions & 0 deletions fbpcs/pc_translator/input_processing/FilterConstraint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

namespace pc_translator {

/*
* Class to store each filter constraint include in the PC instruction set.
*/
class FilterConstraint {
public:
FilterConstraint(const std::string& name, const std::string& type, int value);

/*
* Name of the filter constraint i.e. the field on which this filter is to be
* applied.
*/
std::string getName() const;

/*
* Constraint type i.e. LT, LTE, EQ, NEQ etc.
*/
std::string getType() const;

int getValue() const;

private:
std::string name_;
std::string type_;
int value_;
};

} // namespace pc_translator
51 changes: 51 additions & 0 deletions fbpcs/pc_translator/input_processing/PCInstructionSet.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "fbpcs/pc_translator/input_processing/PCInstructionSet.h"

#include <folly/json.h>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>

namespace pc_translator {

const std::vector<std::string>& PCInstructionSet::getGroupByIds() const {
return groupByIds;
}

const std::vector<FilterConstraint>& PCInstructionSet::getFilterConstraints()
const {
return filterConstraints;
}

PCInstructionSet PCInstructionSet::fromDynamic(const folly::dynamic& obj) {
PCInstructionSet pcInstructionSet;
auto aggregationConfig = obj["aggregated_metrics"];
auto groupByFields = aggregationConfig["group_by"];

for (auto groupByField : groupByFields) {
pcInstructionSet.groupByIds.push_back(groupByField.asString());
}

auto filterConstraintsFields = aggregationConfig["filter"];

for (auto& [key, constraints] : filterConstraintsFields.items()) {
std::string name = key.asString();
for (auto constraint : constraints) {
auto constraintType = constraint["constraint_type"].asString();
auto constraintValue = constraint["value"].asInt();
FilterConstraint filterConstraint(name, constraintType, constraintValue);
pcInstructionSet.filterConstraints.push_back(filterConstraint);
}
}

return pcInstructionSet;
}

} // namespace pc_translator
Loading