Skip to content

Commit

Permalink
Merge pull request #23 from KatSteinke/19-fix-custom-basename
Browse files Browse the repository at this point in the history
Fix setting custom error profile for CAMISIM
  • Loading branch information
KatSteinke authored May 31, 2024
2 parents 831dec3 + 24fddd0 commit a8f5804
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 37 deletions.
49 changes: 32 additions & 17 deletions run_magician.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def make_demo_tempfile(tempfile: pathlib.Path) -> None:


def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT_PROFILE,
profile_base: Optional[str] = "", readlength: Optional[str] = "",
profile_base: Optional[str] = "", readlength: Optional[int] = None,
insert_size: Optional[int] = DEFAULT_INSERT, cluster_cmd: Optional[str] = "",
cores: Optional[int]=DEFAULT_CORES,
*snake_params, config_path: pathlib.Path = default_config_file) -> List[str]:
Expand All @@ -65,6 +65,12 @@ def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT
snake_params: parameters to pass to the Snakefile
config_path: path to the config file to use with Snakemake
Returns:
The command for running Snakemake with the desired parameters.
Raises:
ValueError: if arguments contain invalid characters, or if a value that isn't a positive int
was given for read length, insert size or amount of cores
"""
# check all elements of the command
Expand All @@ -86,11 +92,18 @@ def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT
if cores <= 0:
raise ValueError(core_error)

if profile_type == "own" and not (profile_base and readlength):
raise ValueError("Both name of the custom error profile and read length of the error profile must be given when using own profiles.")
# we only need to check read length when it's relevant - check explicitly for "not None"
# so we can complain about read lengths <= 0 specifically
if profile_type == "own":
if not (profile_base and (readlength is not None)):
raise ValueError("Both name of the custom error profile and read length of the error"
" profile must be given when using own profiles.")
if readlength <= 0:
raise ValueError("Read length needs to be above 0.")

if (profile_base or readlength) and profile_type != "own":
raise ValueError("Name of the error profile and read length can only be specified when using own profiles.")
raise ValueError("Name of the error profile and read length can only be specified"
" when using own profiles.")

# if custom parameters haven't been given, set to False for later processing in Snakefile
if not profile_base:
Expand All @@ -106,21 +119,21 @@ def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT
snake_config = yaml.safe_load(config_file)

# get basic command
snakemake_cmd = ["snakemake", target, "-s", snake_path,
"--config", 'profile_type="{}"'.format(profile_type),
'profile_name="{}"'.format(profile_base), 'readlength="{}"'.format(readlength),
snakemake_cmd = ["snakemake", target, "-s", snake_path]
# add cluster commands if needed
if cluster_cmd:
snakemake_cmd += ["--cluster", cluster_cmd]
snakemake_cmd += ["--config", 'profile_type="{}"'.format(profile_type),
'insert_size={}'.format(insert_size),
'samples_file={}'.format(input_file),
'--use-conda', '--conda-frontend', snake_config["conda_frontend"],
'samples_file={}'.format(input_file)]
if profile_type == "own":
snakemake_cmd += ['profile_name="{}"'.format(profile_base),
'readlength={}'.format(readlength)]
snakemake_cmd += ['--use-conda', '--conda-frontend', snake_config["conda_frontend"],
"--configfile", str(config_path),
"--cores", str(cores),
*snake_params]

# if cluster mode is specified:
if cluster_cmd:
snakemake_cmd.insert(4, "--cluster")
snakemake_cmd.insert(5, cluster_cmd)

return snakemake_cmd


Expand All @@ -144,7 +157,8 @@ def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT
help="""Base name of custom error profile, if given (name of files without '[1/2].txt');\
required with 'own' error profile""", default="")
parser.add_argument("--profile_readlength", action="store",
help="Read length of custom error profile; required with 'own' error profile", default="")
help="Read length of custom error profile; "
"required with 'own' error profile", default=None)
parser.add_argument("--insert_size", action="store", type=int, default=DEFAULT_INSERT,
help=f"Mean insert size for read simulation (default: {DEFAULT_INSERT})")
parser.add_argument("--cluster", action="store", default="",
Expand All @@ -167,8 +181,9 @@ def get_snake_cmd(input_file, target: str, profile_type: Optional[str] = DEFAULT
# make a copy of the input file with absolute paths so it can run anywhere
local_tempfile = pathlib.Path.cwd() / "tmp_demo_sample_distributions.tsv"
make_demo_tempfile(local_tempfile)
snake_command = get_snake_cmd(local_tempfile, "all_bin_summaries", DEFAULT_PROFILE, "",
"", DEFAULT_INSERT, "", DEFAULT_CORES)
snake_command = get_snake_cmd(local_tempfile, "all_bin_summaries", DEFAULT_PROFILE,
insert_size = DEFAULT_INSERT, cluster_cmd = "",
cores = DEFAULT_CORES)
snake_run = subprocess.run(snake_command, check=True)
sys.exit(snake_run.returncode)
if run_demo == "n":
Expand Down
27 changes: 12 additions & 15 deletions snakefiles/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,10 @@ CAMISIM_DIR = config["camisim_path"]
MAGICIAN_DIR = pathlib.Path(workflow.basedir).parent

# if config entries do not exist, set defaults
if not "profile_type" in config:
config["profile_type"] = "mbarc"
if not "profile_name" in config:
config["profile_name"] = "False"
if not "readlength" in config:
config["readlength"] = "False"
if not "insert_size" in config:
config["insert_size"] = 270

PROFILE_TYPE = config.get("profile_type", "mbarc")
PROFILE_NAME = config.get("profile_name", False)
READLENGTH = config.get("readlength", False)
INSERT_SIZE = config.get("insert_size", 270)

rule complete_qc:
input:
Expand Down Expand Up @@ -121,12 +116,14 @@ rule camisim_configfiles:
camisim_dir = CAMISIM_DIR,
#coverage = 20,
samplesize = 2.5,
profile_type = config["profile_type"],
profile_base = "" if config["profile_name"] == "False" else "--profile_basename '{}'".format(pathlib.Path(config["profile_name"]).stem),
profile_readlength = "" if config["readlength"] == "False" else "--profile_readlength {}".format(config["readlength"]),
insert_size = config["insert_size"],
errorprofile_dir = str(pathlib.Path(CAMISIM_DIR) / "tools" / "art_illumina-2.3.6" / "profiles") if config["profile_name"] == "False" \
else pathlib.Path(config["profile_name"]).parent
profile_type = PROFILE_TYPE,
profile_base = "" if not PROFILE_NAME \
else "--profile_basename '{}'".format(pathlib.Path(PROFILE_NAME).stem),
profile_readlength = "" if not READLENGTH \
else "--profile_readlength {}".format(READLENGTH),
insert_size = INSERT_SIZE,
errorprofile_dir = str(pathlib.Path(CAMISIM_DIR) / "tools" / "art_illumina-2.3.6" / "profiles") if not PROFILE_NAME \
else pathlib.Path(PROFILE_NAME).parent
output:
camisim_configfile = 'camisim_config_{sample}.ini'
#conda: pathlib.Path(workflow.current_basedir).parent / "requirements.yml"
Expand Down
40 changes: 35 additions & 5 deletions test/test_run_magician.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TestRunMagician(unittest.TestCase):
snake_path = pathlib.Path(__file__).resolve().parent.parent / "snakefiles" / "Snakefile"
profile_type = "mbarc"
profile_base = ""
readlength = ""
readlength = None
insert_size = 270
cluster_cmd = ""
cores = 6
Expand All @@ -52,7 +52,6 @@ def test_default_command_success(self):
"""Run Snakemake with default settings."""
expected_command = ["snakemake", "all_bin_summaries", "-s", self.snake_path,
"--config", 'profile_type="mbarc"',
'profile_name="False"', 'readlength="False"',
'insert_size=270', f"samples_file={self.distributions_file}",
"--use-conda",
"--conda-frontend", "conda",
Expand All @@ -71,7 +70,6 @@ def test_use_config_success(self):

expected_command = ["snakemake", "all_bin_summaries", "-s", self.snake_path,
"--config", 'profile_type="mbarc"',
'profile_name="False"', 'readlength="False"',
'insert_size=270', f"samples_file={self.distributions_file}",
"--use-conda",
"--conda-frontend", "mamba",
Expand All @@ -92,7 +90,6 @@ def test_run_cluster_command(self):
expected_command = ["snakemake", "all_bin_summaries", "-s", self.snake_path,
"--cluster", "qsub -pe threaded {threads}",
"--config", 'profile_type="mbarc"',
'profile_name="False"', 'readlength="False"',
'insert_size=270', f"samples_file={self.distributions_file}",
"--use-conda",
"--conda-frontend", "conda",
Expand All @@ -111,7 +108,6 @@ def test_change_settings(self):
"""Use non-default settings for various parameters."""
expected_command = ["snakemake", "all_bin_summaries", "-s", self.snake_path,
"--config", 'profile_type="mbarc"',
'profile_name="False"', 'readlength="False"',
'insert_size=500', f"samples_file={self.distributions_file}",
"--use-conda",
"--conda-frontend", "conda",
Expand All @@ -126,6 +122,40 @@ def test_change_settings(self):
*snake_flags)
assert test_command == expected_command

def test_set_own_profile(self):
"""Successfully set a custom error profile."""
expected_command = ["snakemake", "all_bin_summaries", "-s", self.snake_path,
"--config", 'profile_type="own"',
'insert_size=270', f"samples_file={self.distributions_file}",
'profile_name="test_R"', "readlength=150",
"--use-conda",
"--conda-frontend", "conda",
"--configfile", str(run_magician.default_config_file),
"--cores", "6", "-n"]
snake_flags = ["-n"]
profile_name = "own"
profile_base = "test_R"
readlength = 150
test_command = run_magician.get_snake_cmd(self.distributions_file, "all_bin_summaries",
profile_name,
profile_base, readlength, self.insert_size,
self.cluster_cmd, self.cores, *snake_flags)
assert test_command == expected_command

def test_bad_readlength(self):
"""Catch bad read length."""
snake_flags = ["-n"]
profile_name = "own"
profile_base = "test_R"
readlength = 0
error_msg = r"Read length needs to be above 0."
with self.assertRaisesRegex(ValueError, error_msg):
run_magician.get_snake_cmd(self.distributions_file, "all_bin_summaries",
profile_name,
profile_base, readlength, self.insert_size,
self.cluster_cmd, self.cores, *snake_flags)


def test_bad_insertsize(self):
"""Catch bad insert size."""
snake_flags = ["-n"]
Expand Down

0 comments on commit a8f5804

Please sign in to comment.