Skip to content

Commit

Permalink
Merge branch 'crispr' into dev_bjorn
Browse files Browse the repository at this point in the history
  • Loading branch information
BjornFJohansson committed Dec 7, 2023
2 parents 2336f1a + 915fd03 commit 75635b4
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 0 deletions.
128 changes: 128 additions & 0 deletions src/pydna/crispr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2013-2023 by Björn Johansson. All rights reserved.
# This code is part of the Python-dna distribution and governed by its
# license. Please see the LICENSE.txt file that should have been included
# as part of this package.
"""Provides the Dseq class for handling double stranded DNA sequences.
Dseq is a subclass of :class:`Bio.Seq.Seq`. The Dseq class
is mostly useful as a part of the :class:`pydna.dseqrecord.Dseqrecord` class
which can hold more meta data.
The Dseq class support the notion of circular and linear DNA topology.
"""

from abc import ABC, abstractmethod
import re
from pydna.utils import rc


class _cas(ABC):
scaffold = "ND"
pam = "ND"
size = 0
fst5 = 0
fst3 = 0

def __init__(self, protospacer):
self.protospacer = protospacer
self.compsite = re.compile(
f"(?=(?P<watson>{protospacer}{self.pam}))|(?=(?P<crick>{rc(self.pam)}{rc(protospacer)}))", re.UNICODE
)

@abstractmethod
def search(self, dna, linear=True):
"""To override in subclass."""
pass

def __repr__(self):
return f"{type(self).__name__}({self.protospacer[:3]}..{self.protospacer[-3:]})"

@abstractmethod
def __str__(self):
"""To override in subclass."""
pass


class cas9(_cas):
"""docstring.
|----size----------|
---protospacer------
-fst3
fst5 |-|
|--------------|
PAM
5-NNGGAAGAGTAATACACTA-AAANGGNN-3
||||||||||||||||||| ||||||||
3-NNCCTTCTCATTATGTGAT-TTTNCCNN-5
||||||||||||||||| |||
5-GGAAGAGTAATACACTA-AAAg-u-a-a-g-g Scaffold
---gRNA spacer--- u-a
u-a
u-a
u-a
a-u
g-u-g
a a
g-c-a
c-g
u-a
a-u
g a tetraloop
a-a
"""

scaffold = "GTTTTAGAGCTAGAAATAGCAAGTTAAAATAAGG"
pam = ".GG"
size = 20
fst5 = 17
fst3 = -3
ovhg = fst5 - (size + fst3)

def search(self, dna, linear=True):
"""docstring."""
dna = str(dna).upper()
if linear:
dna = dna
else:
dna = dna + dna[1 : self.size]
results = []
for mobj in self.compsite.finditer(dna):
w, c = mobj.groups()
if w:
results.append(mobj.start("watson") + 1 + self.fst5)
if c:
results.append(mobj.start("crick") + len(self.pam) + 1 - self.fst3)
return results

def __str__(self):
"""docstring."""
return f">{type(self).__name__} protospacer scaffold\n{self.protospacer} {self.scaffold}"


def protospacer(guide_construct, cas=cas9):
"""docstring."""
in_watson = [
mobj.group("ps")
for mobj in re.finditer(f"(?P<ps>.{{{cas.size}}})(?:{cas.scaffold})", str(guide_construct.seq).upper())
]
in_crick = [
rc(mobj.group("ps"))
for mobj in re.finditer(f"(?:{rc(cas.scaffold)})(?P<ps>.{{{cas.size}}})", str(guide_construct.seq).upper())
]
return in_watson + in_crick


if __name__ == "__main__":
import os as _os

cached = _os.getenv("pydna_cached_funcs", "")
_os.environ["pydna_cached_funcs"] = ""
import doctest

doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
_os.environ["pydna_cached_funcs"] = cached
37 changes: 37 additions & 0 deletions tests/test_module_crispr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pytest


def test_crispr():
from pydna.crispr import cas9, protospacer
from pydna.dseqrecord import Dseqrecord
from pydna.dseq import Dseq

a = Dseq.from_representation(
"""\
GTTACTTTACCCGACGT
CAATGAAATGGGCTGCA
"""
)

b = Dseq.from_representation(
"""\
CCCaGG
GGGtCC
"""
)

containing_sgRNA = Dseqrecord("GTTACTTTACCCGACGTCCCgttttagagctagaaatagcaagttaaaataagg")

target = Dseqrecord("GTTACTTTACCCGACGTCCCaGG")

assert [f.seq for f in target.cut([cas9(ps) for ps in protospacer(containing_sgRNA)])] == [a, b]
assert [f.seq for f in target.cut([cas9(ps) for ps in protospacer(containing_sgRNA.rc())])] == [a, b]
assert [f.seq for f in target.rc().cut([cas9(ps) for ps in protospacer(containing_sgRNA)])] == [b.rc(), a.rc()]
assert [f.seq for f in target.rc().cut([cas9(ps) for ps in protospacer(containing_sgRNA.rc())])] == [b.rc(), a.rc()]


if __name__ == "__main__":
pytest.main([__file__, "-vv", "-s"])

0 comments on commit 75635b4

Please sign in to comment.