diff --git a/metasyn/__main__.py b/metasyn/__main__.py index 128df947..e5bdf78c 100644 --- a/metasyn/__main__.py +++ b/metasyn/__main__.py @@ -4,12 +4,15 @@ import pathlib import pickle import sys +from configparser import ConfigParser try: # Python < 3.10 (backport) from importlib_metadata import entry_points, version except ImportError: from importlib.metadata import entry_points, version # type: ignore [assignment] +import polars as pl + from metasyn import MetaFrame from metasyn.validation import create_schema @@ -19,7 +22,8 @@ Usage: metasyn [subcommand] [options] Available subcommands: - synthesize - generate synthetic data from a .json file + create-meta - generate a GMF (.json) file. + synthesize - generate synthetic data from a GMF (.json) file jsonschema - generate json schema from distribution providers Program information: @@ -46,11 +50,72 @@ def main() -> None: elif subcommand == "schema": schema() + elif subcommand == "create-meta": + create_metadata() + else: print(f"Invalid subcommand ({subcommand}). For help see metasyn --help") sys.exit(1) +def _parse_config(config_fp): + config = ConfigParser() + config.read(config_fp) + spec = {} + for section in config.sections(): + if section.startswith("var."): + new_dict = {} + for key, val in dict(config[section]).items(): + try: + new_dict[key] = config.getboolean(section, key) + except ValueError: + pass + try: + new_dict[key] = config.getfloat(section, key) + except ValueError: + pass + try: + new_dict[key] = config.getint(section, key) + except ValueError: + pass + if key not in new_dict: + new_dict[key] = val + spec[section[4:]] = new_dict + return spec + + +def create_metadata(): + """Program to create and export metadata from a DataFrame to a GMF file (.json).""" + parser = argparse.ArgumentParser( + prog="metasyn create-meta", + description="Create a Generative Metadata Format file from a CSV file.", + ) + parser.add_argument( + "input", + help="input file; a CSV file that you want to synthesize later.", + type=pathlib.Path, + ) + parser.add_argument( + "output", + help="output file: .json", + type=pathlib.Path, + ) + parser.add_argument( + "--config", + help="Configuration file to specify distribution behavior.", + type=pathlib.Path, + default=None, + ) + args, _ = parser.parse_known_args() + if args.config is not None: + spec = _parse_config(args.config) + else: + spec = {} + data_frame = pl.read_csv(args.input, try_parse_dates=True) + meta_frame = MetaFrame.fit_dataframe(data_frame, spec=spec) + meta_frame.export(args.output) + + def synthesize() -> None: """Program to generate synthetic data.""" parser = argparse.ArgumentParser(