Skip to content

Commit

Permalink
Fix #96 - properly handle @co tags in SAM files
Browse files Browse the repository at this point in the history
  • Loading branch information
jdidion committed May 4, 2020
1 parent 82a726c commit c36e413
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 18 deletions.
21 changes: 20 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
# Changes

## 2.0.0-alpha.3 (dev)
## 2.0.0-alpha.5 (2020.05.04)

* Fix #97 (port from 1.1 branch)
* Fix #96 - properly handle @CO tags in SAM files

## 2.0.0-alpha.4 (2020.01.10)

* bugfixes

## 2.0.0-alpha.3 (2020.01.10)

* bugfixes

## 2.0.0-alpha.2 (2020.01.02)

Expand Down Expand Up @@ -37,6 +48,14 @@
* Enabled output to stdout by default with single-end and interleaved reads.
* Migrated to setuptools_scm for version management.

## v1.1.26 (2020.05.04)

Fix #97 - handle single-end data in QC report

## v1.1.25 (2020.01.10)

Fix #71 - handle corrupted adapter cache file

## v1.1.24 (2019.11.17)

* Fix #87 - Python 3.8 incompatibility - change time.clock() to time.process_time()
Expand Down
17 changes: 10 additions & 7 deletions atropos/io/formatters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABCMeta, abstractmethod
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Union, cast

from atropos.errors import UnknownFileTypeError
from atropos.io import SequenceFileType
Expand Down Expand Up @@ -256,17 +256,20 @@ def _get_header(self) -> Optional[str]:
if not self._header:
return DEFAULT_SAM_HEADER

def create_row(_tags: dict):
tags_str = "\t".join(f"{key}:{val}" for key, val in _tags.items())
def create_row(_tags: Union[str, dict]):
if isinstance(_tags, dict):
tags_str = "\t".join(f"{key}:{val}" for key, val in _tags.items())
else:
tags_str = cast(str, _tags)
return f"@{header_type}\t{tags_str}"

rows = []

for header_type, tags_list in self._header.items():
if isinstance(tags_list, dict):
rows.append(create_row(tags_list))
for header_type, header_value in self._header.items():
if isinstance(header_value, dict):
rows.append(create_row(header_value))
else:
rows.extend(create_row(tags) for tags in tags_list)
rows.extend(create_row(tags) for tags in header_value)

return "\n".join(rows) + "\n"

Expand Down
27 changes: 17 additions & 10 deletions atropos/io/readers/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
import sys
from typing import (
Callable, Iterator, IO, Optional, Sequence as SequenceType, Tuple, Type,
Callable, Dict, Iterator, IO, Optional, Sequence as SequenceType, Tuple, Type,
Union, cast
)

Expand Down Expand Up @@ -634,23 +634,30 @@ def __init__(self, sam_file: IO):
self._header = {}
self._header_size = 0

def add_header(fields: SequenceType[str]):
header_type = fields[0][1:]
tags = dict(
def fields_to_tags(fields: SequenceType[str]) -> Dict[str, str]:
return dict(
(pair[0], pair[1])
for pair in (kv.split(":") for kv in fields[1:])
for pair in (kv.split(":") for kv in fields)
)

def add_header(fields: SequenceType[str]):
header_type = fields[0][1:]
if header_type == "HD":
self._header["HD"] = tags
self._header["HD"] = fields_to_tags(fields[1:])
else:
if header_type == "SQ" and "LN" in tags:
tags["LN"] = int(tags["LN"])

if header_type not in self._header:
self._header[header_type] = []

self._header[header_type].append(tags)
if header_type == "CO":
# the text after the first tab is free-form - even tab is allowed
self._header[header_type].append("\t".join(fields[1:]))
else:
tags = fields_to_tags(fields)

if header_type == "SQ" and "LN" in tags:
tags["LN"] = int(tags["LN"])

self._header[header_type].append(tags)

for line in self._reader:
if line[0].startswith("@"):
Expand Down

0 comments on commit c36e413

Please sign in to comment.