From 1e4c0d70f4697fcc5dd4f6f91bb2d5e4b0bd1b4c Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 13:22:35 -0700 Subject: [PATCH 1/7] tests pass w "read too short" as fate --- CHANGELOG.rst | 4 + dms_variants/__init__.py | 2 +- dms_variants/illuminabarcodeparser.py | 128 +++++----- notebooks/parsebarcodes_sim_data.ipynb | 229 ++++++++++++++++-- tests/count_codonvariant_files/fates.csv | 3 + tests/illuminabarcodeparser_toy_example.ipynb | 155 +++++++++--- tests/test_count_codonvariants.py | 1 + 7 files changed, 408 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 24ceefe..13f6241 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog `_. +1.6.0 +----- +- Added ability to parse second upstream / downstream region in ``IlluminaBarcodeParser`` by adding ``upstream2`` and ``downstream`` parameters. Also modified ``IlluminaBarcodeParser`` so that reads will only be parsed if they are long enough to fully cover the region containing the barcodes and specified upstream / downstream sequences. Based on docs, this is how it was supposed to function before but did not. Additionally, this adds another row ("reads too short") to the fates from the barcode parser. + 1.5.0 ----- - Remove use of deprecated ``scipy`` functions like ``flip`` to use ``numpy`` alternatives instead (fixes [this issue](https://github.com/jbloomlab/dms_variants/issues/86)). diff --git a/dms_variants/__init__.py b/dms_variants/__init__.py index db24f4b..1538f11 100644 --- a/dms_variants/__init__.py +++ b/dms_variants/__init__.py @@ -10,5 +10,5 @@ __author__ = "`the Bloom lab `_" __email__ = "jbloom@fredhutch.org" -__version__ = "1.5.0" +__version__ = "1.6.0" __url__ = "https://github.com/jbloomlab/dms_variants" diff --git a/dms_variants/illuminabarcodeparser.py b/dms_variants/illuminabarcodeparser.py index 3eef2f7..b6e194f 100644 --- a/dms_variants/illuminabarcodeparser.py +++ b/dms_variants/illuminabarcodeparser.py @@ -30,7 +30,7 @@ class IlluminaBarcodeParser: ---- Barcodes should be read by R1 and optionally R2. Expected arrangement is - 5'-[R2_start]-upstream-barcode-downstream-[R1_start]-3' + 5'-[R2_start]-upstream2-upstream-barcode-downstream-downstream2-[R1_start]-3' R1 anneals downstream of barcode and reads backwards. If R2 is used, it anneals upstream of barcode and reads forward. There can be sequences @@ -72,12 +72,20 @@ class IlluminaBarcodeParser: Length of barcodes. upstream : str Sequence upstream of barcode. + upstream2 : str + Second sequence upstream of barcode. downstream : str Sequence downstream of barcode. + downstream2 : str + Second sequence downstream of barcode upstream_mismatch : int Max number of mismatches allowed in `upstream`. + upstream2_mismatch : int + Max number of mismatches allowed in `upstream2`. downstream_mismatch : int Max number of mismatches allowed in `downstream`. + downstream2_mismatch : int + Max number of mismatches allowed in `downstream2`. valid_barcodes : None or set If not `None`, set of barcodes to retain. bc_orientation : {'R1', 'R2'} @@ -101,9 +109,13 @@ def __init__( *, bclen=None, upstream="", + upstream2="", downstream="", + downstream2="", upstream_mismatch=0, + upstream2_mismatch=0, downstream_mismatch=0, + downstream2_mismatch=0, valid_barcodes=None, bc_orientation="R1", minq=20, @@ -112,16 +124,20 @@ def __init__( ): """See main class doc string.""" self.bclen = bclen - if regex.match(f"^[{self.VALID_NTS}]*$", upstream): - self.upstream = upstream - else: - raise ValueError(f"invalid chars in upstream {upstream}") - if regex.match(f"^[{self.VALID_NTS}]*$", downstream): - self.downstream = downstream - else: - raise ValueError(f"invalid chars in downstream {downstream}") + for param_name, param_val in [ + ("upstream", upstream), + ("downstream", downstream), + ("upstream2", upstream2), + ("downstream2", downstream2), + ]: + if regex.match(f"^[{self.VALID_NTS}]*$", param_val): + setattr(self, param_name, param_val) + else: + raise ValueError(f"invalid chars in {param_name} {param_val}") self.upstream_mismatch = upstream_mismatch self.downstream_mismatch = downstream_mismatch + self.upstream2_mismatch = upstream2_mismatch + self.downstream2_mismatch = downstream2_mismatch self.valid_barcodes = valid_barcodes if self.valid_barcodes is not None: self.valid_barcodes = set(self.valid_barcodes) @@ -142,13 +158,36 @@ def __init__( self.list_all_valid_barcodes = list_all_valid_barcodes # specify information about R1 / R2 matches - self._bcend = { - "R1": self.bclen + len(self.downstream), - "R2": self.bclen + len(self.upstream), - } self._rcdownstream = reverse_complement(self.downstream) self._rcupstream = reverse_complement(self.upstream) - self._matches = {"R1": {}, "R2": {}} # match objects by read length + self._rcdownstream2 = reverse_complement(self.downstream2) + self._rcupstream2 = reverse_complement(self.upstream2) + + # build the regex read matches + self._matchers = { + "R1": regex.compile( + f"({self._rcdownstream2})" + + f"{{s<={self.downstream2_mismatch}}}" + + f"({self._rcdownstream})" + + f"{{s<={self.downstream_mismatch}}}" + + f"(?P[ACTG]{{{self.bclen}}})" + + f"({self._rcupstream})" + + f"{{s<={self.upstream_mismatch}}}" + + f"({self._rcupstream2})" + + f"{{s<={self.upstream2_mismatch}}}" + ), + "R2": regex.compile( + f"({self.upstream2})" + + f"{{s<={self.upstream2_mismatch}}}" + + f"^({self.upstream})" + + f"{{s<={self.upstream_mismatch}}}" + + f"(?P[ACTG]{{{self.bclen}}})" + + f"({self.downstream})" + + f"{{s<={self.downstream_mismatch}}}" + + f"({self.downstream2})" + + f"{{s<={self.downstream2_mismatch}}}" + ), + } def parse(self, r1files, *, r2files=None, add_cols=None): """Parse barcodes from files. @@ -210,21 +249,28 @@ def parse(self, r1files, *, r2files=None, add_cols=None): "low quality barcode": 0, "invalid barcode": 0, "valid barcode": 0, + "read too short": 0, } if not r1only: fates["R1 / R2 disagree"] = 0 - # max length of interest for reads - max_len = self.bclen + len(self.upstream) + len(self.downstream) + # min length of interest for reads + minlen = ( + self.bclen + + len(self.upstream) + + len(self.downstream) + + len(self.upstream2) + + len(self.downstream2) + ) for filetup in zip(*fileslist): if r1only: assert len(filetup) == 1 - iterator = iterate_fastq(filetup[0], check_pair=1, trim=max_len) + iterator = iterate_fastq(filetup[0], check_pair=1, trim=minlen) else: assert len(filetup) == 2, f"{filetup}\n{fileslist}" iterator = iterate_fastq_pair( - filetup[0], filetup[1], r1trim=max_len, r2trim=max_len + filetup[0], filetup[1], r1trim=minlen, r2trim=minlen ) for entry in iterator: @@ -242,44 +288,18 @@ def parse(self, r1files, *, r2files=None, add_cols=None): fates["failed chastity filter"] += 1 continue - matches = {} - for read, r in zip(reads, readlist): - rlen = len(r) + if any(len(r) < minlen for r in readlist): + fates["read too short"] += 1 + continue - # get or build matcher for read of this length - len_past_bc = rlen - self._bcend[read] - if len_past_bc < 0: - raise ValueError(f"{read} too short: {rlen}") - elif rlen in self._matches[read]: - matcher = self._matches[read][rlen] - else: - if read == "R1": - match_str = ( - f"^({self._rcdownstream})" - f"{{s<={self.downstream_mismatch}}}" - f"(?P[ACTG]{{{self.bclen}}})" - f"({self._rcupstream[: len_past_bc]})" - f"{{s<={self.upstream_mismatch}}}" - ) - else: - assert read == "R2" - match_str = ( - f"^({self.upstream})" - f"{{s<={self.upstream_mismatch}}}" - f"(?P[ACTG]{{{self.bclen}}})" - f"({self.downstream[: len_past_bc]})" - f"{{s<={self.downstream_mismatch}}}" - ) - matcher = regex.compile(match_str, flags=regex.BESTMATCH) - self._matches[read][rlen] = matcher - - m = matcher.match(r) - if m: - matches[read] = m - else: - break + assert all(len(r) == minlen for r in readlist) + + matches = { + read: self._matchers[read].fullmatch(r) + for (read, r) in zip(reads, readlist) + } - if len(matches) == len(reads): + if all(m is not None for m in matches.values()): bc = {} bc_q = {} for read, q in zip(reads, qlist): diff --git a/notebooks/parsebarcodes_sim_data.ipynb b/notebooks/parsebarcodes_sim_data.ipynb index e7cc757..8367500 100644 --- a/notebooks/parsebarcodes_sim_data.ipynb +++ b/notebooks/parsebarcodes_sim_data.ipynb @@ -26,6 +26,13 @@ "classes": [], "id": "", "n": "1" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:40.815432Z", + "iopub.status.busy": "2024-04-10T20:05:40.814366Z", + "iopub.status.idle": "2024-04-10T20:05:44.954972Z", + "shell.execute_reply": "2024-04-10T20:05:44.954174Z", + "shell.execute_reply.started": "2024-04-10T20:05:40.815379Z" } }, "outputs": [], @@ -61,6 +68,13 @@ "classes": [], "id": "", "n": "3" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:44.958831Z", + "iopub.status.busy": "2024-04-10T20:05:44.958529Z", + "iopub.status.idle": "2024-04-10T20:05:44.961978Z", + "shell.execute_reply": "2024-04-10T20:05:44.961384Z", + "shell.execute_reply.started": "2024-04-10T20:05:44.958804Z" } }, "outputs": [], @@ -83,6 +97,13 @@ "classes": [], "id": "", "n": "5" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:44.965486Z", + "iopub.status.busy": "2024-04-10T20:05:44.965154Z", + "iopub.status.idle": "2024-04-10T20:05:44.968603Z", + "shell.execute_reply": "2024-04-10T20:05:44.968002Z", + "shell.execute_reply.started": "2024-04-10T20:05:44.965457Z" } }, "outputs": [], @@ -100,7 +121,15 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:44.972410Z", + "iopub.status.busy": "2024-04-10T20:05:44.971869Z", + "iopub.status.idle": "2024-04-10T20:05:44.977782Z", + "shell.execute_reply": "2024-04-10T20:05:44.977077Z", + "shell.execute_reply.started": "2024-04-10T20:05:44.972378Z" + } + }, "outputs": [], "source": [ "theme_set(dms_variants.plotnine_themes.theme_graygrid())" @@ -124,6 +153,13 @@ "classes": [], "id": "", "n": "6" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:44.981199Z", + "iopub.status.busy": "2024-04-10T20:05:44.980887Z", + "iopub.status.idle": "2024-04-10T20:05:45.004497Z", + "shell.execute_reply": "2024-04-10T20:05:45.003961Z", + "shell.execute_reply.started": "2024-04-10T20:05:44.981171Z" } }, "outputs": [], @@ -153,6 +189,13 @@ "classes": [], "id": "", "n": "8" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.007722Z", + "iopub.status.busy": "2024-04-10T20:05:45.007435Z", + "iopub.status.idle": "2024-04-10T20:05:45.022236Z", + "shell.execute_reply": "2024-04-10T20:05:45.021685Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.007698Z" } }, "outputs": [ @@ -275,7 +318,15 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.025335Z", + "iopub.status.busy": "2024-04-10T20:05:45.025036Z", + "iopub.status.idle": "2024-04-10T20:05:45.029102Z", + "shell.execute_reply": "2024-04-10T20:05:45.028494Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.025309Z" + } + }, "outputs": [ { "name": "stdout", @@ -301,7 +352,15 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.032391Z", + "iopub.status.busy": "2024-04-10T20:05:45.032082Z", + "iopub.status.idle": "2024-04-10T20:05:45.042341Z", + "shell.execute_reply": "2024-04-10T20:05:45.041684Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.032363Z" + } + }, "outputs": [ { "data": { @@ -384,7 +443,15 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.045607Z", + "iopub.status.busy": "2024-04-10T20:05:45.045302Z", + "iopub.status.idle": "2024-04-10T20:05:45.050103Z", + "shell.execute_reply": "2024-04-10T20:05:45.049529Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.045581Z" + } + }, "outputs": [], "source": [ "barcode_seqs = []\n", @@ -404,7 +471,15 @@ { "cell_type": "code", "execution_count": 10, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.053294Z", + "iopub.status.busy": "2024-04-10T20:05:45.052987Z", + "iopub.status.idle": "2024-04-10T20:05:45.057238Z", + "shell.execute_reply": "2024-04-10T20:05:45.056639Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.053267Z" + } + }, "outputs": [], "source": [ "n_invalid = 3\n", @@ -423,7 +498,15 @@ { "cell_type": "code", "execution_count": 11, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.060813Z", + "iopub.status.busy": "2024-04-10T20:05:45.060433Z", + "iopub.status.idle": "2024-04-10T20:05:45.064295Z", + "shell.execute_reply": "2024-04-10T20:05:45.063634Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.060785Z" + } + }, "outputs": [], "source": [ "n_unparseable = 2\n", @@ -442,7 +525,15 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.067729Z", + "iopub.status.busy": "2024-04-10T20:05:45.067406Z", + "iopub.status.idle": "2024-04-10T20:05:45.070818Z", + "shell.execute_reply": "2024-04-10T20:05:45.070140Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.067700Z" + } + }, "outputs": [], "source": [ "minq = 20\n", @@ -459,7 +550,15 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.077081Z", + "iopub.status.busy": "2024-04-10T20:05:45.076734Z", + "iopub.status.idle": "2024-04-10T20:05:45.080445Z", + "shell.execute_reply": "2024-04-10T20:05:45.079808Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.077053Z" + } + }, "outputs": [], "source": [ "barcode_seqs = [(seq, minq_char * len(seq)) for seq in barcode_seqs]" @@ -475,7 +574,15 @@ { "cell_type": "code", "execution_count": 14, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.081577Z", + "iopub.status.busy": "2024-04-10T20:05:45.081245Z", + "iopub.status.idle": "2024-04-10T20:05:45.085347Z", + "shell.execute_reply": "2024-04-10T20:05:45.084804Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.081550Z" + } + }, "outputs": [], "source": [ "lowq_char = chr(minq + 33 - 1) # low-quality Q-score\n", @@ -495,7 +602,15 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.086445Z", + "iopub.status.busy": "2024-04-10T20:05:45.086135Z", + "iopub.status.idle": "2024-04-10T20:05:45.090674Z", + "shell.execute_reply": "2024-04-10T20:05:45.089984Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.086419Z" + } + }, "outputs": [ { "name": "stdout", @@ -576,6 +691,13 @@ "classes": [], "id": "", "n": "13" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.091953Z", + "iopub.status.busy": "2024-04-10T20:05:45.091602Z", + "iopub.status.idle": "2024-04-10T20:05:45.098706Z", + "shell.execute_reply": "2024-04-10T20:05:45.097642Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.091921Z" } }, "outputs": [], @@ -602,7 +724,15 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.100991Z", + "iopub.status.busy": "2024-04-10T20:05:45.100290Z", + "iopub.status.idle": "2024-04-10T20:05:45.113899Z", + "shell.execute_reply": "2024-04-10T20:05:45.113239Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.100933Z" + } + }, "outputs": [], "source": [ "with tempfile.NamedTemporaryFile(\"r+\") as fastq:\n", @@ -624,7 +754,15 @@ { "cell_type": "code", "execution_count": 18, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.115209Z", + "iopub.status.busy": "2024-04-10T20:05:45.114844Z", + "iopub.status.idle": "2024-04-10T20:05:45.123314Z", + "shell.execute_reply": "2024-04-10T20:05:45.122677Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.115178Z" + } + }, "outputs": [ { "data": { @@ -705,7 +843,15 @@ { "cell_type": "code", "execution_count": 19, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.124438Z", + "iopub.status.busy": "2024-04-10T20:05:45.124130Z", + "iopub.status.idle": "2024-04-10T20:05:45.132350Z", + "shell.execute_reply": "2024-04-10T20:05:45.131799Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.124412Z" + } + }, "outputs": [ { "data": { @@ -770,6 +916,13 @@ " lib_1\n", " sample_1\n", " \n", + " \n", + " 5\n", + " read too short\n", + " 0\n", + " lib_1\n", + " sample_1\n", + " \n", " \n", "\n", "" @@ -780,7 +933,8 @@ "1 invalid barcode 3 lib_1 sample_1\n", "2 unparseable barcode 2 lib_1 sample_1\n", "3 low quality barcode 1 lib_1 sample_1\n", - "4 failed chastity filter 0 lib_1 sample_1" + "4 failed chastity filter 0 lib_1 sample_1\n", + "5 read too short 0 lib_1 sample_1" ] }, "execution_count": 19, @@ -803,7 +957,15 @@ { "cell_type": "code", "execution_count": 20, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.133441Z", + "iopub.status.busy": "2024-04-10T20:05:45.133143Z", + "iopub.status.idle": "2024-04-10T20:05:45.155925Z", + "shell.execute_reply": "2024-04-10T20:05:45.155393Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.133416Z" + } + }, "outputs": [ { "name": "stdout", @@ -929,6 +1091,13 @@ "classes": [], "id": "", "n": "14" + }, + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.157017Z", + "iopub.status.busy": "2024-04-10T20:05:45.156703Z", + "iopub.status.idle": "2024-04-10T20:05:45.163082Z", + "shell.execute_reply": "2024-04-10T20:05:45.162517Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.156993Z" } }, "outputs": [], @@ -955,7 +1124,15 @@ { "cell_type": "code", "execution_count": 22, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.164156Z", + "iopub.status.busy": "2024-04-10T20:05:45.163844Z", + "iopub.status.idle": "2024-04-10T20:05:45.171713Z", + "shell.execute_reply": "2024-04-10T20:05:45.170643Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.164133Z" + } + }, "outputs": [], "source": [ "pd.testing.assert_frame_equal(\n", @@ -967,6 +1144,7 @@ " (\"unparseable barcode\", n_unparseable),\n", " (\"low quality barcode\", n_low_quality),\n", " (\"failed chastity filter\", 0),\n", + " (\"read too short\", 0),\n", " ],\n", " columns=[\"fate\", \"count\"],\n", " ),\n", @@ -984,17 +1162,28 @@ { "cell_type": "code", "execution_count": 23, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T20:05:45.173799Z", + "iopub.status.busy": "2024-04-10T20:05:45.173196Z", + "iopub.status.idle": "2024-04-10T20:05:49.443413Z", + "shell.execute_reply": "2024-04-10T20:05:49.442570Z", + "shell.execute_reply.started": "2024-04-10T20:05:45.173754Z" + } + }, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] }, "metadata": { - "needs_background": "light" + "image/png": { + "height": 308, + "width": 280 + } }, "output_type": "display_data" } @@ -1050,7 +1239,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.11.7" }, "toc": { "nav_menu": {}, diff --git a/tests/count_codonvariant_files/fates.csv b/tests/count_codonvariant_files/fates.csv index 501ead0..5b030ee 100644 --- a/tests/count_codonvariant_files/fates.csv +++ b/tests/count_codonvariant_files/fates.csv @@ -3,14 +3,17 @@ invalid barcode,1739,library-1,plasmid,run-1 low quality barcode,480,library-1,plasmid,run-1 unparseable barcode,281,library-1,plasmid,run-1 failed chastity filter,0,library-1,plasmid,run-1 +read too short,0,library-1,plasmid,run-1 valid barcode,0,library-1,plasmid,run-1 invalid barcode,1761,library-1,uninduced,run-1 low quality barcode,374,library-1,uninduced,run-1 unparseable barcode,362,library-1,uninduced,run-1 valid barcode,3,library-1,uninduced,run-1 failed chastity filter,0,library-1,uninduced,run-1 +read too short,0,library-1,uninduced,run-1 invalid barcode,1844,library-2,plasmid,run-1 low quality barcode,435,library-2,plasmid,run-1 unparseable barcode,220,library-2,plasmid,run-1 valid barcode,1,library-2,plasmid,run-1 failed chastity filter,0,library-2,plasmid,run-1 +read too short,0,library-2,plasmid,run-1 diff --git a/tests/illuminabarcodeparser_toy_example.ipynb b/tests/illuminabarcodeparser_toy_example.ipynb index f077c54..c51037d 100644 --- a/tests/illuminabarcodeparser_toy_example.ipynb +++ b/tests/illuminabarcodeparser_toy_example.ipynb @@ -15,7 +15,15 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:22.648068Z", + "iopub.status.busy": "2024-04-10T19:51:22.647666Z", + "iopub.status.idle": "2024-04-10T19:51:23.690513Z", + "shell.execute_reply": "2024-04-10T19:51:23.689696Z", + "shell.execute_reply.started": "2024-04-10T19:51:22.648033Z" + } + }, "outputs": [], "source": [ "import tempfile\n", @@ -35,7 +43,15 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.695972Z", + "iopub.status.busy": "2024-04-10T19:51:23.695581Z", + "iopub.status.idle": "2024-04-10T19:51:23.702713Z", + "shell.execute_reply": "2024-04-10T19:51:23.701795Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.695935Z" + } + }, "outputs": [], "source": [ "parser = IlluminaBarcodeParser(bclen=4, upstream=\"ACATGA\", downstream=\"GACT\")" @@ -54,7 +70,15 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.708338Z", + "iopub.status.busy": "2024-04-10T19:51:23.707670Z", + "iopub.status.idle": "2024-04-10T19:51:23.726018Z", + "shell.execute_reply": "2024-04-10T19:51:23.725068Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.708272Z" + } + }, "outputs": [], "source": [ "r1file = tempfile.NamedTemporaryFile(mode=\"w\")\n", @@ -163,23 +187,32 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.729694Z", + "iopub.status.busy": "2024-04-10T19:51:23.729427Z", + "iopub.status.idle": "2024-04-10T19:51:23.744681Z", + "shell.execute_reply": "2024-04-10T19:51:23.743646Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.729664Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " barcode count\n", - "0 CGTA 2\n", - "1 AGTA 1\n", + "0 AGTA 1\n", + "1 CGTA 1\n", "2 GCCG 1\n", " fate count\n", - "0 valid barcode 4\n", - "1 unparseable barcode 3\n", + "0 unparseable barcode 3\n", + "1 valid barcode 3\n", "2 R1 / R2 disagree 1\n", "3 low quality barcode 1\n", - "4 failed chastity filter 0\n", - "5 invalid barcode 0\n" + "4 read too short 1\n", + "5 failed chastity filter 0\n", + "6 invalid barcode 0\n" ] } ], @@ -200,22 +233,31 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.748395Z", + "iopub.status.busy": "2024-04-10T19:51:23.748172Z", + "iopub.status.idle": "2024-04-10T19:51:23.759268Z", + "shell.execute_reply": "2024-04-10T19:51:23.758404Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.748370Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " barcode count\n", - "0 CGTA 2\n", - "1 AAGT 1\n", + "0 AAGT 1\n", + "1 CGTA 1\n", "2 GCCG 1\n", " fate count\n", - "0 valid barcode 4\n", - "1 unparseable barcode 3\n", + "0 unparseable barcode 3\n", + "1 valid barcode 3\n", "2 low quality barcode 2\n", - "3 failed chastity filter 0\n", - "4 invalid barcode 0\n" + "3 read too short 1\n", + "4 failed chastity filter 0\n", + "5 invalid barcode 0\n" ] } ], @@ -235,22 +277,31 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.763562Z", + "iopub.status.busy": "2024-04-10T19:51:23.763160Z", + "iopub.status.idle": "2024-04-10T19:51:23.777870Z", + "shell.execute_reply": "2024-04-10T19:51:23.776948Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.763528Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " barcode count library sample\n", - "0 CGTA 2 lib-1 s1\n", - "1 AAGT 1 lib-1 s1\n", + "0 AAGT 1 lib-1 s1\n", + "1 CGTA 1 lib-1 s1\n", "2 GCCG 1 lib-1 s1\n", " fate count library sample\n", - "0 valid barcode 4 lib-1 s1\n", - "1 unparseable barcode 3 lib-1 s1\n", + "0 unparseable barcode 3 lib-1 s1\n", + "1 valid barcode 3 lib-1 s1\n", "2 low quality barcode 2 lib-1 s1\n", - "3 failed chastity filter 0 lib-1 s1\n", - "4 invalid barcode 0 lib-1 s1\n" + "3 read too short 1 lib-1 s1\n", + "4 failed chastity filter 0 lib-1 s1\n", + "5 invalid barcode 0 lib-1 s1\n" ] } ], @@ -272,24 +323,33 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.779178Z", + "iopub.status.busy": "2024-04-10T19:51:23.778863Z", + "iopub.status.idle": "2024-04-10T19:51:23.792937Z", + "shell.execute_reply": "2024-04-10T19:51:23.791547Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.779151Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " barcode count\n", - "0 CGTA 2\n", - "1 AGTA 1\n", + "0 AGTA 1\n", + "1 CGTA 1\n", "2 GCCG 1\n", "3 GGAG 1\n", " fate count\n", - "0 valid barcode 5\n", + "0 valid barcode 4\n", "1 unparseable barcode 2\n", "2 R1 / R2 disagree 1\n", "3 low quality barcode 1\n", - "4 failed chastity filter 0\n", - "5 invalid barcode 0\n" + "4 read too short 1\n", + "5 failed chastity filter 0\n", + "6 invalid barcode 0\n" ] } ], @@ -318,23 +378,32 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.795621Z", + "iopub.status.busy": "2024-04-10T19:51:23.794665Z", + "iopub.status.idle": "2024-04-10T19:51:23.811820Z", + "shell.execute_reply": "2024-04-10T19:51:23.811230Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.795555Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " barcode count\n", - "0 CGTA 2\n", - "1 AGTA 1\n", + "0 AGTA 1\n", + "1 CGTA 1\n", "2 TAAT 0\n", " fate count\n", "0 unparseable barcode 3\n", - "1 valid barcode 3\n", + "1 valid barcode 2\n", "2 R1 / R2 disagree 1\n", "3 invalid barcode 1\n", "4 low quality barcode 1\n", - "5 failed chastity filter 0\n" + "5 read too short 1\n", + "6 failed chastity filter 0\n" ] } ], @@ -357,7 +426,15 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T19:51:23.813291Z", + "iopub.status.busy": "2024-04-10T19:51:23.812924Z", + "iopub.status.idle": "2024-04-10T19:51:23.816757Z", + "shell.execute_reply": "2024-04-10T19:51:23.815995Z", + "shell.execute_reply.started": "2024-04-10T19:51:23.813264Z" + } + }, "outputs": [], "source": [ "r1file.close()\n", @@ -367,7 +444,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -381,7 +458,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.11.7" }, "toc": { "base_numbering": 1, @@ -398,5 +475,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tests/test_count_codonvariants.py b/tests/test_count_codonvariants.py index 7d3defc..ba165e2 100644 --- a/tests/test_count_codonvariants.py +++ b/tests/test_count_codonvariants.py @@ -155,6 +155,7 @@ def test_count_codonvariants(self): ) fatesfile = os.path.join(indir, "fates.csv") + print(fates) assert_frame_equal(fates, pd.read_csv(fatesfile)) libs_to_analyze = ["library-1"] From d9a3e4e4ad7494544b28c86b046017dfe6634923 Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 13:22:49 -0700 Subject: [PATCH 2/7] fix deprecation warning --- dms_variants/codonvarianttable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dms_variants/codonvarianttable.py b/dms_variants/codonvarianttable.py index 1db8132..500cb21 100644 --- a/dms_variants/codonvarianttable.py +++ b/dms_variants/codonvarianttable.py @@ -1999,7 +1999,7 @@ def plotMutFreqs( assert "target" not in set(df.columns).union(set(n_variants.columns)) df = ( - df.groupby(["library", "sample", "mutation_type", "site"]) + df.groupby(["library", "sample", "mutation_type", "site"], observed=False) .aggregate({"count": "sum"}) .reset_index() .merge(n_variants, on=["library", "sample"]) From cb75b899fc854fcc42cfb6d33047c768b3e2ad5b Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 14:49:33 -0700 Subject: [PATCH 3/7] add test of barcodes w `upstream2` --- ...arcodeparser_toy_example_w_upstream2.ipynb | 346 ++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb diff --git a/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb new file mode 100644 index 0000000..ed98f41 --- /dev/null +++ b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Toy example with `IlluminaBarcodeParser` containing an `upstream2` sequence\n", + "This example illustrates use of a [IlluminaBarcodeParser](https://jbloomlab.github.io/dms_variants/dms_variants.illuminabarcodeparser.html#dms_variants.illuminabarcodeparser.IlluminaBarcodeParser) on a toy example.\n", + "\n", + "It is written primarily as a test for that class.\n", + "\n", + "Import required modules:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:06.599078Z", + "iopub.status.busy": "2024-04-10T21:48:06.598706Z", + "iopub.status.idle": "2024-04-10T21:48:07.574491Z", + "shell.execute_reply": "2024-04-10T21:48:07.573386Z", + "shell.execute_reply.started": "2024-04-10T21:48:06.599045Z" + } + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "from dms_variants.illuminabarcodeparser import IlluminaBarcodeParser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initialize an `IlluminaBarcodeParser` for a barcode arrangement that looks like this:\n", + "\n", + " 5'-[R2 binding site]-GCA-ACATGA-NNNN-[R1 binding site]-3'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.578862Z", + "iopub.status.busy": "2024-04-10T21:48:07.578511Z", + "iopub.status.idle": "2024-04-10T21:48:07.584685Z", + "shell.execute_reply": "2024-04-10T21:48:07.583848Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.578834Z" + } + }, + "outputs": [], + "source": [ + "parser = IlluminaBarcodeParser(bclen=4, upstream=\"ACATGA\", upstream2=\"GCA\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create temporary file holding the FASTQ reads.\n", + "We write some valid test reads and some invalid reads. \n", + "The header for each read explains why it is valid / invalid. \n", + "We use quality scores of ``?`` (30) or ``+`` (10) for high- and low-quality bases:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.588864Z", + "iopub.status.busy": "2024-04-10T21:48:07.588522Z", + "iopub.status.idle": "2024-04-10T21:48:07.597119Z", + "shell.execute_reply": "2024-04-10T21:48:07.596272Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.588833Z" + } + }, + "outputs": [], + "source": [ + "r1file = tempfile.NamedTemporaryFile(mode=\"w\")\n", + "\n", + "# valid TACG barcode, full flanking regions\n", + "_ = r1file.write(\n", + " \"@valid_CGTA_barcode\\n\"\n", + " \"CGTATCATGTTGC\\n\"\n", + " \"+\\n\"\n", + " \"?????????????\\n\"\n", + ")\n", + "\n", + "# valid CGTA barcode, partial flanking regions\n", + "_ = r1file.write(\n", + " \"@valid_CGTA_barcode_partial_flanking_region\\n\"\n", + " \"CGTATCATTGC\\n\"\n", + " \"+\\n\"\n", + " \"???????????\\n\"\n", + ")\n", + "\n", + "# valid GCCG barcode, extended flanking regions\n", + "_ = r1file.write(\n", + " \"@valid_GCCG_barcode_extended_flanking_region\\n\"\n", + " \"GCCGTCATGTTGCCAA\\n\"\n", + " \"+\\n\"\n", + " \"????????????????\\n\"\n", + ")\n", + "\n", + "# some sites low quality\n", + "_ = r1file.write(\n", + " \"@low_quality_site\\n\" \"CGTATCATGTTGC\\n\" \"+\\n\" \"???+?????????\\n\"\n", + ")\n", + "\n", + "# N in barcode\n", + "_ = r1file.write(\"@N_in_barcode\\n\" \"CGTNTCATGTTGC\\n\" \"+\\n\" \"?????????????\\n\")\n", + "\n", + "# GGAG barcode, one mismatch in flanking region\n", + "_ = r1file.write(\n", + " \"@GGAG_barcode_one_mismatch_in_upstream\\n\" \"GGAGTCATGATGC\\n\" \"+\\n\" \"?????????????\\n\"\n", + ")\n", + "\n", + "# GGTG barcode, mismatch in both upstream regions\n", + "_ = r1file.write(\n", + " \"@GGTG_barcode_two_mismatch_in_upstream_and_upstream2\\n\"\n", + " \"GGTGTCATGATGG\\n\"\n", + " \"+\\n\"\n", + " \"?????????????\\n\"\n", + ")\n", + "\n", + "r1file.flush()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parse the barcodes using both R1 and R2 reads:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.601083Z", + "iopub.status.busy": "2024-04-10T21:48:07.600756Z", + "iopub.status.idle": "2024-04-10T21:48:07.621543Z", + "shell.execute_reply": "2024-04-10T21:48:07.620283Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.601055Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " barcode count\n", + "0 CGTA 1\n", + "1 GCCG 1\n", + " fate count\n", + "0 unparseable barcode 3\n", + "1 valid barcode 2\n", + "2 low quality barcode 1\n", + "3 read too short 1\n", + "4 failed chastity filter 0\n", + "5 invalid barcode 0\n" + ] + } + ], + "source": [ + "barcodes, fates = parser.parse(r1file.name)\n", + "print(barcodes)\n", + "print(fates)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now create a parser that allows mismatch in `upstream`, and check that we recover barcode:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.626568Z", + "iopub.status.busy": "2024-04-10T21:48:07.626231Z", + "iopub.status.idle": "2024-04-10T21:48:07.642139Z", + "shell.execute_reply": "2024-04-10T21:48:07.641184Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.626531Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " barcode count\n", + "0 CGTA 1\n", + "1 GCCG 1\n", + "2 GGAG 1\n", + " fate count\n", + "0 valid barcode 3\n", + "1 unparseable barcode 2\n", + "2 low quality barcode 1\n", + "3 read too short 1\n", + "4 failed chastity filter 0\n", + "5 invalid barcode 0\n" + ] + } + ], + "source": [ + "parser_mismatch = IlluminaBarcodeParser(\n", + " bclen=4,\n", + " upstream=\"ACATGA\",\n", + " upstream2=\"GCA\",\n", + " upstream_mismatch=1,\n", + ")\n", + "barcodes_mismatch, fates_mismatch = parser_mismatch.parse(r1file.name)\n", + "print(barcodes_mismatch)\n", + "print(fates_mismatch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now create a parser that allows mismatch in `upstream` and `upstream2`, and check that we recover barcode:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.643592Z", + "iopub.status.busy": "2024-04-10T21:48:07.643257Z", + "iopub.status.idle": "2024-04-10T21:48:07.656472Z", + "shell.execute_reply": "2024-04-10T21:48:07.655830Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.643560Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " barcode count\n", + "0 CGTA 1\n", + "1 GCCG 1\n", + "2 GGAG 1\n", + "3 GGTG 1\n", + " fate count\n", + "0 valid barcode 4\n", + "1 low quality barcode 1\n", + "2 read too short 1\n", + "3 unparseable barcode 1\n", + "4 failed chastity filter 0\n", + "5 invalid barcode 0\n" + ] + } + ], + "source": [ + "parser_mismatch = IlluminaBarcodeParser(\n", + " bclen=4,\n", + " upstream=\"ACATGA\",\n", + " upstream2=\"GCA\",\n", + " upstream_mismatch=1,\n", + " upstream2_mismatch=1,\n", + ")\n", + "barcodes_mismatch, fates_mismatch = parser_mismatch.parse(r1file.name)\n", + "print(barcodes_mismatch)\n", + "print(fates_mismatch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Close the temporary file:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T21:48:07.657977Z", + "iopub.status.busy": "2024-04-10T21:48:07.657654Z", + "iopub.status.idle": "2024-04-10T21:48:07.661132Z", + "shell.execute_reply": "2024-04-10T21:48:07.660520Z", + "shell.execute_reply.started": "2024-04-10T21:48:07.657944Z" + } + }, + "outputs": [], + "source": [ + "r1file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From eccd4d650d0f10e9560539ba1370787d692c0ec6 Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 15:18:45 -0700 Subject: [PATCH 4/7] add `outer_flank_fates` to classify outer flank failures differently in barcode parsing --- CHANGELOG.rst | 2 +- dms_variants/illuminabarcodeparser.py | 52 ++++++- ...arcodeparser_toy_example_w_upstream2.ipynb | 137 ++++++++++++------ 3 files changed, 141 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 13f6241..01359ae 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,7 +8,7 @@ The format is based on `Keep a Changelog `_. 1.6.0 ----- -- Added ability to parse second upstream / downstream region in ``IlluminaBarcodeParser`` by adding ``upstream2`` and ``downstream`` parameters. Also modified ``IlluminaBarcodeParser`` so that reads will only be parsed if they are long enough to fully cover the region containing the barcodes and specified upstream / downstream sequences. Based on docs, this is how it was supposed to function before but did not. Additionally, this adds another row ("reads too short") to the fates from the barcode parser. +- Added ability to parse second upstream / downstream region in ``IlluminaBarcodeParser`` by adding ``upstream2`` and ``downstream2`` parameters. Also modified ``IlluminaBarcodeParser`` so that reads will only be parsed if they are long enough to fully cover the region containing the barcodes and specified upstream / downstream sequences. Based on docs, this is how it was supposed to function before but did not. Additionally, this adds another row ("reads too short") to the fates from the barcode parser, as well as the ``outer_flank_fates`` option to report just failing the additional upstream and downstream regions. 1.5.0 ----- diff --git a/dms_variants/illuminabarcodeparser.py b/dms_variants/illuminabarcodeparser.py index b6e194f..26e7da8 100644 --- a/dms_variants/illuminabarcodeparser.py +++ b/dms_variants/illuminabarcodeparser.py @@ -38,7 +38,11 @@ class IlluminaBarcodeParser: must fully cover region between R1 start and barcode, and if using R2 then `upstream` must fully cover region between R2 start and barcode. However, it is fine if R1 reads backwards past `upstream`, and if `R2` - reads forward past `downstream`. + reads forward past `downstream`. The `upstream2` and `downstream2` + can be used to require additional flanking sequences. Normally these + would just be rolled into `upstream` and `downstream`, but you might + specify separately if you are actually using these to parse additional + indices that you might want to set different mismatch criteria for. Parameters ---------- @@ -179,7 +183,7 @@ def __init__( "R2": regex.compile( f"({self.upstream2})" + f"{{s<={self.upstream2_mismatch}}}" - + f"^({self.upstream})" + + f"({self.upstream})" + f"{{s<={self.upstream_mismatch}}}" + f"(?P[ACTG]{{{self.bclen}}})" + f"({self.downstream})" @@ -189,7 +193,30 @@ def __init__( ), } - def parse(self, r1files, *, r2files=None, add_cols=None): + # build matchers that do not have upstream2 or downstream2 if needed + self._has_flank2 = (len(self.upstream2) > 0) or (len(self.downstream2) > 0) + self._matchers_no_flank2 = { + "R1": regex.compile( + f"[{self.VALID_NTS}]{{{len(self.downstream2)}}}" + + f"({self._rcdownstream})" + + f"{{s<={self.downstream_mismatch}}}" + + f"(?P[ACTG]{{{self.bclen}}})" + + f"({self._rcupstream})" + + f"{{s<={self.upstream_mismatch}}}" + + f"[{self.VALID_NTS}]{{{len(self.upstream2)}}}" + ), + "R2": regex.compile( + f"[{self.VALID_NTS}]{{{len(self.upstream2)}}}" + + f"^({self.upstream})" + + f"{{s<={self.upstream_mismatch}}}" + + f"(?P[ACTG]{{{self.bclen}}})" + + f"({self.downstream})" + + f"{{s<={self.downstream_mismatch}}}" + + f"[{self.VALID_NTS}]{{{len(self.downstream2)}}}" + ), + } + + def parse(self, r1files, *, r2files=None, add_cols=None, outer_flank_fates=False): """Parse barcodes from files. Parameters @@ -201,6 +228,11 @@ def parse(self, r1files, *, r2files=None, add_cols=None): add_cols : None or dict If dict, specify names and values (i.e., sample or library names) to be aded to returned data frames. + outer_flank_fates : bool + If `True`, if using outer flanking regions then in the output fates + specify reads that fail just the outer flanking regions (`upstream2` or + `downstream2`). Otherwise, such failures will be grouped with the + "unparseable barcode" fate. Returns ------- @@ -216,6 +248,9 @@ def parse(self, r1files, *, r2files=None, add_cols=None): - "R1 / R2 disagree" (if using `r2files`) - "low quality barcode": sequencing quality low - "unparseable barcode": invalid flank sequence, N in barcode + - "read too short": read is too short to cover specified region + - "invalid outer flank" : if using `outer_flank_fates` and + `upstream2` or `downstream2` fails. Note that these data frames also include any columns specified by `add_cols`. @@ -253,6 +288,8 @@ def parse(self, r1files, *, r2files=None, add_cols=None): } if not r1only: fates["R1 / R2 disagree"] = 0 + if outer_flank_fates and self._has_flank2: + fates["invalid outer flank"] = 0 # min length of interest for reads minlen = ( @@ -341,6 +378,15 @@ def parse(self, r1files, *, r2files=None, add_cols=None): fates["low quality barcode"] += 1 else: fates["R1 / R2 disagree"] += 1 + elif ( + outer_flank_fates + and self._has_flank2 + and all( + self._matchers_no_flank2[read].fullmatch(r) is not None + for (read, r) in zip(reads, readlist) + ) + ): + fates["invalid outer flank"] += 1 else: # invalid flanking sequence or N in barcode fates["unparseable barcode"] += 1 diff --git a/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb index ed98f41..75d7ed2 100644 --- a/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb +++ b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb @@ -17,11 +17,11 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:06.599078Z", - "iopub.status.busy": "2024-04-10T21:48:06.598706Z", - "iopub.status.idle": "2024-04-10T21:48:07.574491Z", - "shell.execute_reply": "2024-04-10T21:48:07.573386Z", - "shell.execute_reply.started": "2024-04-10T21:48:06.599045Z" + "iopub.execute_input": "2024-04-10T22:17:38.979735Z", + "iopub.status.busy": "2024-04-10T22:17:38.979371Z", + "iopub.status.idle": "2024-04-10T22:17:39.999532Z", + "shell.execute_reply": "2024-04-10T22:17:39.998756Z", + "shell.execute_reply.started": "2024-04-10T22:17:38.979703Z" } }, "outputs": [], @@ -45,11 +45,11 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.578862Z", - "iopub.status.busy": "2024-04-10T21:48:07.578511Z", - "iopub.status.idle": "2024-04-10T21:48:07.584685Z", - "shell.execute_reply": "2024-04-10T21:48:07.583848Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.578834Z" + "iopub.execute_input": "2024-04-10T22:17:40.003968Z", + "iopub.status.busy": "2024-04-10T22:17:40.003644Z", + "iopub.status.idle": "2024-04-10T22:17:40.011300Z", + "shell.execute_reply": "2024-04-10T22:17:40.010467Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.003937Z" } }, "outputs": [], @@ -72,11 +72,11 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.588864Z", - "iopub.status.busy": "2024-04-10T21:48:07.588522Z", - "iopub.status.idle": "2024-04-10T21:48:07.597119Z", - "shell.execute_reply": "2024-04-10T21:48:07.596272Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.588833Z" + "iopub.execute_input": "2024-04-10T22:17:40.015148Z", + "iopub.status.busy": "2024-04-10T22:17:40.014722Z", + "iopub.status.idle": "2024-04-10T22:17:40.022961Z", + "shell.execute_reply": "2024-04-10T22:17:40.022166Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.015118Z" } }, "outputs": [], @@ -84,12 +84,7 @@ "r1file = tempfile.NamedTemporaryFile(mode=\"w\")\n", "\n", "# valid TACG barcode, full flanking regions\n", - "_ = r1file.write(\n", - " \"@valid_CGTA_barcode\\n\"\n", - " \"CGTATCATGTTGC\\n\"\n", - " \"+\\n\"\n", - " \"?????????????\\n\"\n", - ")\n", + "_ = r1file.write(\"@valid_CGTA_barcode\\n\" \"CGTATCATGTTGC\\n\" \"+\\n\" \"?????????????\\n\")\n", "\n", "# valid CGTA barcode, partial flanking regions\n", "_ = r1file.write(\n", @@ -108,9 +103,7 @@ ")\n", "\n", "# some sites low quality\n", - "_ = r1file.write(\n", - " \"@low_quality_site\\n\" \"CGTATCATGTTGC\\n\" \"+\\n\" \"???+?????????\\n\"\n", - ")\n", + "_ = r1file.write(\"@low_quality_site\\n\" \"CGTATCATGTTGC\\n\" \"+\\n\" \"???+?????????\\n\")\n", "\n", "# N in barcode\n", "_ = r1file.write(\"@N_in_barcode\\n\" \"CGTNTCATGTTGC\\n\" \"+\\n\" \"?????????????\\n\")\n", @@ -143,11 +136,11 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.601083Z", - "iopub.status.busy": "2024-04-10T21:48:07.600756Z", - "iopub.status.idle": "2024-04-10T21:48:07.621543Z", - "shell.execute_reply": "2024-04-10T21:48:07.620283Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.601055Z" + "iopub.execute_input": "2024-04-10T22:17:40.026647Z", + "iopub.status.busy": "2024-04-10T22:17:40.026403Z", + "iopub.status.idle": "2024-04-10T22:17:40.039583Z", + "shell.execute_reply": "2024-04-10T22:17:40.038894Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.026620Z" } }, "outputs": [ @@ -186,11 +179,11 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.626568Z", - "iopub.status.busy": "2024-04-10T21:48:07.626231Z", - "iopub.status.idle": "2024-04-10T21:48:07.642139Z", - "shell.execute_reply": "2024-04-10T21:48:07.641184Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.626531Z" + "iopub.execute_input": "2024-04-10T22:17:40.042962Z", + "iopub.status.busy": "2024-04-10T22:17:40.042642Z", + "iopub.status.idle": "2024-04-10T22:17:40.056823Z", + "shell.execute_reply": "2024-04-10T22:17:40.056209Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.042936Z" }, "scrolled": true }, @@ -229,7 +222,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now create a parser that allows mismatch in `upstream` and `upstream2`, and check that we recover barcode:" + "Now classify outer flank failures differently from unparseable barcodes:" ] }, { @@ -237,11 +230,63 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.643592Z", - "iopub.status.busy": "2024-04-10T21:48:07.643257Z", - "iopub.status.idle": "2024-04-10T21:48:07.656472Z", - "shell.execute_reply": "2024-04-10T21:48:07.655830Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.643560Z" + "iopub.execute_input": "2024-04-10T22:17:40.058324Z", + "iopub.status.busy": "2024-04-10T22:17:40.057954Z", + "iopub.status.idle": "2024-04-10T22:17:40.069187Z", + "shell.execute_reply": "2024-04-10T22:17:40.068353Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.058295Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " barcode count\n", + "0 CGTA 1\n", + "1 GCCG 1\n", + "2 GGAG 1\n", + " fate count\n", + "0 valid barcode 3\n", + "1 invalid outer flank 1\n", + "2 low quality barcode 1\n", + "3 read too short 1\n", + "4 unparseable barcode 1\n", + "5 failed chastity filter 0\n", + "6 invalid barcode 0\n" + ] + } + ], + "source": [ + "parser_mismatch = IlluminaBarcodeParser(\n", + " bclen=4,\n", + " upstream=\"ACATGA\",\n", + " upstream2=\"GCA\",\n", + " upstream_mismatch=1,\n", + ")\n", + "barcodes_mismatch, fates_mismatch = parser_mismatch.parse(r1file.name, outer_flank_fates=True)\n", + "print(barcodes_mismatch)\n", + "print(fates_mismatch)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now create a parser that allows mismatch in `upstream` and `upstream2`, and check that we recover barcode:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2024-04-10T22:17:40.070486Z", + "iopub.status.busy": "2024-04-10T22:17:40.070109Z", + "iopub.status.idle": "2024-04-10T22:17:40.082614Z", + "shell.execute_reply": "2024-04-10T22:17:40.081961Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.070458Z" } }, "outputs": [ @@ -286,14 +331,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2024-04-10T21:48:07.657977Z", - "iopub.status.busy": "2024-04-10T21:48:07.657654Z", - "iopub.status.idle": "2024-04-10T21:48:07.661132Z", - "shell.execute_reply": "2024-04-10T21:48:07.660520Z", - "shell.execute_reply.started": "2024-04-10T21:48:07.657944Z" + "iopub.execute_input": "2024-04-10T22:17:40.083879Z", + "iopub.status.busy": "2024-04-10T22:17:40.083467Z", + "iopub.status.idle": "2024-04-10T22:17:40.087525Z", + "shell.execute_reply": "2024-04-10T22:17:40.086863Z", + "shell.execute_reply.started": "2024-04-10T22:17:40.083850Z" } }, "outputs": [], From eef73456ec01853303a9020f91992d5b5e3787e9 Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 15:21:23 -0700 Subject: [PATCH 5/7] format code --- tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb index 75d7ed2..6af345b 100644 --- a/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb +++ b/tests/illuminabarcodeparser_toy_example_w_upstream2.ipynb @@ -265,7 +265,9 @@ " upstream2=\"GCA\",\n", " upstream_mismatch=1,\n", ")\n", - "barcodes_mismatch, fates_mismatch = parser_mismatch.parse(r1file.name, outer_flank_fates=True)\n", + "barcodes_mismatch, fates_mismatch = parser_mismatch.parse(\n", + " r1file.name, outer_flank_fates=True\n", + ")\n", "print(barcodes_mismatch)\n", "print(fates_mismatch)" ] From b008de9fd3a681789c599056de166521822d4ee8 Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 15:36:34 -0700 Subject: [PATCH 6/7] remove obsolete `guide=False` from `plotnine` plots --- CHANGELOG.rst | 2 ++ notebooks/codonvariant_sim_data.ipynb | 2 +- notebooks/codonvariant_sim_data_multi_targets.ipynb | 2 +- notebooks/multi_latent_phenos.ipynb | 2 +- notebooks/narrow_bottleneck.ipynb | 2 +- notebooks/parsebarcodes_sim_data.ipynb | 2 +- tests/monotonicsplineepistasisbottlenecklikelihood_model.ipynb | 2 +- tests/monotonicsplineepistasiscauchylikelihood_model.ipynb | 2 +- tests/monotonicsplineepistasisgaussianlikelihood_model.ipynb | 2 +- tests/noepistasisbottlenecklikelihood_model.ipynb | 2 +- tests/noepistasisgaussianlikelihood_model.ipynb | 2 +- 11 files changed, 12 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 01359ae..617c392 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,8 @@ The format is based on `Keep a Changelog `_. ----- - Added ability to parse second upstream / downstream region in ``IlluminaBarcodeParser`` by adding ``upstream2`` and ``downstream2`` parameters. Also modified ``IlluminaBarcodeParser`` so that reads will only be parsed if they are long enough to fully cover the region containing the barcodes and specified upstream / downstream sequences. Based on docs, this is how it was supposed to function before but did not. Additionally, this adds another row ("reads too short") to the fates from the barcode parser, as well as the ``outer_flank_fates`` option to report just failing the additional upstream and downstream regions. +- Remove obsolete ``guide=False`` from some ``plotnine`` plots in examples / tests (this was removed in ``plotnine`` version 0.13). + 1.5.0 ----- - Remove use of deprecated ``scipy`` functions like ``flip`` to use ``numpy`` alternatives instead (fixes [this issue](https://github.com/jbloomlab/dms_variants/issues/86)). diff --git a/notebooks/codonvariant_sim_data.ipynb b/notebooks/codonvariant_sim_data.ipynb index a2ac6af..51aef91 100644 --- a/notebooks/codonvariant_sim_data.ipynb +++ b/notebooks/codonvariant_sim_data.ipynb @@ -3584,7 +3584,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw(show=True)" ] diff --git a/notebooks/codonvariant_sim_data_multi_targets.ipynb b/notebooks/codonvariant_sim_data_multi_targets.ipynb index 8b95145..226b3cc 100644 --- a/notebooks/codonvariant_sim_data_multi_targets.ipynb +++ b/notebooks/codonvariant_sim_data_multi_targets.ipynb @@ -5430,7 +5430,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "\n", "_ = p.draw(show=True)" diff --git a/notebooks/multi_latent_phenos.ipynb b/notebooks/multi_latent_phenos.ipynb index e1e5b0e..bde24eb 100644 --- a/notebooks/multi_latent_phenos.ipynb +++ b/notebooks/multi_latent_phenos.ipynb @@ -768,7 +768,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw(show=True)" ] diff --git a/notebooks/narrow_bottleneck.ipynb b/notebooks/narrow_bottleneck.ipynb index ab08e57..b56f35c 100644 --- a/notebooks/narrow_bottleneck.ipynb +++ b/notebooks/narrow_bottleneck.ipynb @@ -835,7 +835,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw(show=True)" ] diff --git a/notebooks/parsebarcodes_sim_data.ipynb b/notebooks/parsebarcodes_sim_data.ipynb index 8367500..9f67cd7 100644 --- a/notebooks/parsebarcodes_sim_data.ipynb +++ b/notebooks/parsebarcodes_sim_data.ipynb @@ -1205,7 +1205,7 @@ " + geom_bar(stat=\"identity\")\n", " + facet_grid(\"sample ~ library\")\n", " + facet_grid(\"sample ~ library\")\n", - " + scale_fill_manual(CBPALETTE, guide=False)\n", + " + scale_fill_manual(CBPALETTE)\n", " + theme(\n", " figure_size=(\n", " 1.4 * (1 + len(fates[\"library\"].unique())),\n", diff --git a/tests/monotonicsplineepistasisbottlenecklikelihood_model.ipynb b/tests/monotonicsplineepistasisbottlenecklikelihood_model.ipynb index eb6b40d..1150b2c 100644 --- a/tests/monotonicsplineepistasisbottlenecklikelihood_model.ipynb +++ b/tests/monotonicsplineepistasisbottlenecklikelihood_model.ipynb @@ -490,7 +490,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw()" ] diff --git a/tests/monotonicsplineepistasiscauchylikelihood_model.ipynb b/tests/monotonicsplineepistasiscauchylikelihood_model.ipynb index 5b962c2..53b625d 100644 --- a/tests/monotonicsplineepistasiscauchylikelihood_model.ipynb +++ b/tests/monotonicsplineepistasiscauchylikelihood_model.ipynb @@ -745,7 +745,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw()" ] diff --git a/tests/monotonicsplineepistasisgaussianlikelihood_model.ipynb b/tests/monotonicsplineepistasisgaussianlikelihood_model.ipynb index e9206c9..ddf217b 100644 --- a/tests/monotonicsplineepistasisgaussianlikelihood_model.ipynb +++ b/tests/monotonicsplineepistasisgaussianlikelihood_model.ipynb @@ -605,7 +605,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw()" ] diff --git a/tests/noepistasisbottlenecklikelihood_model.ipynb b/tests/noepistasisbottlenecklikelihood_model.ipynb index 01ab9ee..35c0108 100644 --- a/tests/noepistasisbottlenecklikelihood_model.ipynb +++ b/tests/noepistasisbottlenecklikelihood_model.ipynb @@ -403,7 +403,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw()" ] diff --git a/tests/noepistasisgaussianlikelihood_model.ipynb b/tests/noepistasisgaussianlikelihood_model.ipynb index 402e350..02307d3 100644 --- a/tests/noepistasisgaussianlikelihood_model.ipynb +++ b/tests/noepistasisgaussianlikelihood_model.ipynb @@ -705,7 +705,7 @@ " axis_text_x=element_text(angle=90),\n", " panel_grid_major_x=element_blank(), # no vertical grid lines\n", " )\n", - " + scale_fill_manual(values=CBPALETTE[1:], guide=False)\n", + " + scale_fill_manual(values=CBPALETTE[1:])\n", ")\n", "_ = p.draw()" ] From 2f681afb17c1d029ae8c2e536591e4529663b869 Mon Sep 17 00:00:00 2001 From: jbloom Date: Wed, 10 Apr 2024 15:51:03 -0700 Subject: [PATCH 7/7] remove apparently obsolete `gnuplot` color map --- CHANGELOG.rst | 2 +- dms_variants/codonvarianttable.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 617c392..437617b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,7 +9,7 @@ The format is based on `Keep a Changelog `_. 1.6.0 ----- - Added ability to parse second upstream / downstream region in ``IlluminaBarcodeParser`` by adding ``upstream2`` and ``downstream2`` parameters. Also modified ``IlluminaBarcodeParser`` so that reads will only be parsed if they are long enough to fully cover the region containing the barcodes and specified upstream / downstream sequences. Based on docs, this is how it was supposed to function before but did not. Additionally, this adds another row ("reads too short") to the fates from the barcode parser, as well as the ``outer_flank_fates`` option to report just failing the additional upstream and downstream regions. - +- Change default color of heatmaps made by ``CodonVariantTable`` due to current one being obsolete. - Remove obsolete ``guide=False`` from some ``plotnine`` plots in examples / tests (this was removed in ``plotnine`` version 0.13). 1.5.0 diff --git a/dms_variants/codonvarianttable.py b/dms_variants/codonvarianttable.py index 500cb21..7665566 100644 --- a/dms_variants/codonvarianttable.py +++ b/dms_variants/codonvarianttable.py @@ -1934,7 +1934,6 @@ def plotMutHeatmap( expand=(0, 0), ) + p9.ylab(mut_desc) - + p9.scale_fill_cmap("gnuplot") ) if samples is None: