Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ac updates #119

Merged
merged 24 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9094860
have removed cgat/SequencePairProperties.py to simplify dependancies
Acribbs Apr 1, 2024
e8e6dd3
removed cgat2rdf to make repo easier to maintain
Acribbs Apr 1, 2024
0077438
have updated numpy and pandas to specify correct float and int to sat…
Acribbs Apr 1, 2024
1ef56ed
have removed WrapperCodeML because I have no idea what it does so tha…
Acribbs Apr 1, 2024
489c5fa
have removed WrapperCodeML documentation too
Acribbs Apr 1, 2024
dabdc70
added import tests to github actions
Acribbs Apr 1, 2024
bb5893e
updated tests for unstrip, md5sum is different but output looks the s…
Acribbs Apr 1, 2024
127c685
fix github actions yml
Acribbs Apr 1, 2024
4f646b3
updated test for unstrip as updated old one
Acribbs Apr 1, 2024
83a2198
added new test for bam2bam set-nh as output is different md5sum but c…
Acribbs Apr 1, 2024
0510079
updated downsample paired, looks like same output but likely because …
Acribbs Apr 1, 2024
942937c
updated downsample single tests, likely related to .int and .float ch…
Acribbs Apr 1, 2024
493d032
updated bamfiltered because of .int and .float changes
Acribbs Apr 1, 2024
78cd71c
updated set sequence because of int and float changes in numpy
Acribbs Apr 1, 2024
e98871e
updated strip_match to reflect changes in the float and int
Acribbs Apr 1, 2024
999ae34
updated strip_sequence
Acribbs Apr 1, 2024
2a7a9cd
updated strip quality
Acribbs Apr 1, 2024
cc91d91
updated unset mapq
Acribbs Apr 1, 2024
3d4ba0d
updated unset mapq
Acribbs Apr 1, 2024
e5f14a4
import of cnested list is not working during github testing, but is l…
Acribbs Apr 1, 2024
5102e98
modified setup from setup.py to pip
Acribbs Apr 1, 2024
4b169bb
update geneprofile as np floats wrong
Acribbs Apr 1, 2024
2b3bd72
update dnumpy floats
Acribbs Apr 1, 2024
4b376fe
have removed csv2db as its in cgatcore and imports this as a module a…
Acribbs Apr 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
328 changes: 328 additions & 0 deletions #setup.py#
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
from setuptools import setup, find_packages, Extension
import sysconfig
import sys
import os
import subprocess
import re

########################################################################
#######################################################################
# Check for dependencies
#
# Is there a way to do this more elegantly?
# 1. Run "pip install numpy"
# 2. Wrap inside functions (works for numpy/pysam, but not cython)
try:
import numpy
except ImportError:
raise ImportError(
"the CGAT code collection requires numpy to be installed "
"before running setup.py (pip install numpy)")

try:
import Cython
except ImportError:
raise ImportError(
"the CGAT code collection requires cython to "
"be installed before running setup.py (pip install cython)")

try:
import pysam
except ImportError:
raise ImportError(
"the CGAT code collection requires pysam to "
"be installed before running setup.py (pip install pysam)")

########################################################################
########################################################################
# Import setuptools
# Use existing setuptools, otherwise try ez_setup.
try:
import setuptools
except ImportError:
# try to get via ez_setup
# ez_setup did not work on all machines tested as
# it uses curl with https protocol, which is not
# enabled in ScientificLinux
import ez_setup
ez_setup.use_setuptools()

from setuptools import setup, find_packages, Extension

from distutils.version import LooseVersion
if LooseVersion(setuptools.__version__) < LooseVersion('1.1'):
print("Version detected:", LooseVersion(setuptools.__version__))
raise ImportError(
"the CGAT code collection requires setuptools 1.1 higher")

from Cython.Distutils import build_ext

########################################################################
########################################################################
IS_OSX = sys.platform == 'darwin'

########################################################################
########################################################################
# collect CGAT version
sys.path.insert(0, "cgat")
import version

version = version.__version__

###############################################################
###############################################################
# Check for external dependencies
#
# Not exhaustive, simply execute a representative tool from a toolkit.
external_dependencies = (
("wigToBigWig", "UCSC tools", 255),
("bedtools", "bedtools", 0),
)

for tool, toolkit, expected in external_dependencies:
try:
# py3k
from subprocess import DEVNULL
except ImportError:
DEVNULL = open(os.devnull, 'wb')

try:
retcode = subprocess.call(tool, shell=True,
stdout=DEVNULL, stderr=DEVNULL)
except OSError as msg:
print(("WARNING: depency check for %s failed: %s" % (toolkit, msg)))

# UCSC tools return 255 when called without arguments
if retcode != expected:
print(("WARNING: depency check for %s(%s) failed, error %i" %
(toolkit, tool, retcode)))

###############################################################
###############################################################
# Define dependencies
#
major, minor1, minor2, s, tmp = sys.version_info

if (major == 2 and minor1 < 7) or major < 2:
raise SystemExit("""CGAT requires Python 2.7 or later.""")


#####################################################################
#####################################################################
# Code to install dependencies from a repository
#####################################################################
# Modified from http://stackoverflow.com/a/9125399
#####################################################################
def which(program):
"""
Detect whether or not a program is installed.
Thanks to http://stackoverflow.com/a/377028/70191
"""
def is_exe(fpath):
return os.path.exists(fpath) and os.access(fpath, os.X_OK)

fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ['PATH'].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file

return None

REPO_REQUIREMENT = re.compile(
r'^-e (?P<link>(?P<vcs>git|svn|hg|bzr).+#egg=(?P<package>.+)-(?P<version>\d(?:\.\d)*))$')
HTTPS_REQUIREMENT = re.compile(
r'^-e (?P<link>.*).+#(?P<package>.+)-(?P<version>\d(?:\.\d)*)$')
install_requires = []
dependency_links = []

for requirement in (
l.strip() for l in open('requires.txt') if not l.startswith("#")):
match = REPO_REQUIREMENT.match(requirement)
if match:
assert which(match.group('vcs')) is not None, \
("VCS '%(vcs)s' must be installed in order to "
"install %(link)s" % match.groupdict())
install_requires.append("%(package)s==%(version)s" % match.groupdict())
dependency_links.append(match.group('link'))
continue

if requirement.startswith("https"):
install_requires.append(requirement)
continue

match = HTTPS_REQUIREMENT.match(requirement)
if match:
install_requires.append("%(package)s>=%(version)s" % match.groupdict())
dependency_links.append(match.group('link'))
continue

install_requires.append(requirement)

if major == 2:
install_requires.extend(['web.py>=0.37',
'xlwt>=0.7.4',
'matplotlib-venn>=0.5'])
elif major == 3:
pass

cgat_packages = find_packages()
cgat_package_dirs = {'cgat': 'cgat'}

##########################################################
##########################################################
# classifiers
classifiers = """
Development Status :: 3 - Alpha
Intended Audience :: Science/Research
Intended Audience :: Developers
License :: OSI Approved
Programming Language :: Python
Topic :: Software Development
Topic :: Scientific/Engineering
Operating System :: POSIX
Operating System :: Unix
Operating System :: MacOS
"""

##########################################################
# Cython Extensions
conda_includes = [os.path.dirname(sysconfig.get_paths()["include"])]
conda_libdirs = [os.path.dirname(sysconfig.get_paths()["stdlib"])]

# Connected components cython extension
pysam_libraries = pysam.get_libraries()
pysam_libdirs = list(set(os.path.dirname(x) for x in
pysam_libraries)) + conda_libdirs

# remove lib and .so and add htslib
pysam_libs = ["hts"] + list([os.path.basename(x)[3:-3] for x in
pysam_libraries])

pysam_dirname = os.path.dirname(pysam.__file__)
if IS_OSX:
# linking against bundles does no work (and apparently is not needed)
# within OS X
extra_link_args = []
else:
extra_link_args = [os.path.join(pysam_dirname, x) for x in
pysam.get_libraries()]

extra_link_args_pysam = ['-Wl,-rpath,{}'.format(x) for x in pysam_libdirs] +\
['-Wl,-rpath,{}'.format(x) for x in conda_libdirs]

extensions = [
Extension(
'cgat.Components',
['cgat/Components/Components.pyx',
'cgat/Components/connected_components.cpp', ],
library_dirs=[],
libraries=[],
language="c++",
),
Extension(
"cgat.NCL.cnestedlist",
["cgat/NCL/cnestedlist.pyx",
"cgat/NCL/intervaldb.c"],
library_dirs=[],
libraries=[],
language="c",
),
Extension(
"cgat.GeneModelAnalysis",
["cgat/GeneModelAnalysis.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=[],
libraries=[],
define_macros=pysam.get_defines(),
language="c",
),
Extension(
"cgat.BamTools.bamtools",
["cgat/BamTools/bamtools.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=pysam_libdirs,
libraries=pysam_libs,
define_macros=pysam.get_defines(),
language="c",
extra_link_args=extra_link_args_pysam,
),
Extension(
"cgat.BamTools.geneprofile",
["cgat/BamTools/geneprofile.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=pysam_libdirs,
libraries=pysam_libs,
define_macros=pysam.get_defines(),
language="c",
extra_link_args=extra_link_args_pysam,
),
Extension(
"cgat.BamTools.peakshape",
["cgat/BamTools/peakshape.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=pysam_libdirs,
libraries=pysam_libs,
define_macros=pysam.get_defines(),
language="c",
extra_link_args=extra_link_args_pysam,
),
Extension(
"cgat.VCFTools",
["cgat/VCFTools/vcftools.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=pysam_libdirs,
libraries=pysam_libs,
define_macros=pysam.get_defines(),
language="c",
extra_link_args=extra_link_args_pysam,
),
Extension(
"cgat.FastqTools",
["cgat/FastqTools/fastqtools.pyx"],
include_dirs=conda_includes + pysam.get_include() + [numpy.get_include()],
library_dirs=pysam_libdirs,
libraries=pysam_libs,
define_macros=pysam.get_defines(),
language="c",
extra_link_args=extra_link_args_pysam,
),
]

for e in extensions:
e.cython_directives = {'language_level': "3str"} #all are Python-3

setup(
# package information
name='cgat',
version=version,
description='cgat : the Computational Genomics Analysis Toolkit',
author='Andreas Heger',
author_email='[email protected]',
license="MIT",
platforms=["any"],
keywords="computational genomics",
long_description='cgat : the Computational Genomics Analysis Toolkit',
classifiers=[_f for _f in classifiers.split("\n") if _f],
url="http://www.cgat.org/cgat/Tools/",
# package contents
packages=cgat_packages,
package_dir=cgat_package_dirs,
include_package_data=True,
entry_points={
'console_scripts': ['cgat = cgat.cgat:main']
},
# dependencies
install_requires=install_requires,
dependency_links=dependency_links,
# extension modules
ext_modules=extensions,
cmdclass={'build_ext': build_ext},
# other options
zip_safe=False,
test_suite="tests",
)
2 changes: 1 addition & 1 deletion .github/workflows/cgatapps_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
conda list
- name: Test
run: |
python setup.py install
pip install -e .
pip install nose
nosetests -v tests/test_style.py
nosetests -v tests/test_scripts.py
16 changes: 8 additions & 8 deletions cgat/BamTools/bamtools.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -666,12 +666,12 @@ def bam2stats_count(AlignmentFile samfile,
counter.total_read2_is_unmapped = total_read2_is_unmapped
counter.total_read2_is_missing = total_read2_is_missing

substitution_rates = numpy.zeros(fastq_nreads, dtype=numpy.float)
insertion_rates = numpy.zeros(fastq_nreads, dtype=numpy.float)
deletion_rates = numpy.zeros(fastq_nreads, dtype=numpy.float)
error_rates = numpy.zeros(fastq_nreads, dtype=numpy.float)
coverages = numpy.zeros(fastq_nreads, dtype=numpy.float)
mask = numpy.ones(fastq_nreads, dtype=numpy.int)
substitution_rates = numpy.zeros(fastq_nreads, dtype=numpy.float64)
insertion_rates = numpy.zeros(fastq_nreads, dtype=numpy.float64)
deletion_rates = numpy.zeros(fastq_nreads, dtype=numpy.float64)
error_rates = numpy.zeros(fastq_nreads, dtype=numpy.float64)
coverages = numpy.zeros(fastq_nreads, dtype=numpy.float64)
mask = numpy.ones(fastq_nreads, dtype=numpy.int64)

if outfile_details:
header = ["read_md5",
Expand Down Expand Up @@ -1558,7 +1558,7 @@ def bam2stats_window_count(AlignmentFile samfile,
cdef uint32_t ncontigs = len(contigs)
cdef int ncolumns = len(columns)
cdef numpy.ndarray offsets = numpy.zeros(
ncontigs + 1, dtype=numpy.int32)
ncontigs + 1, dtype=numpy.int64)

for idx, length in enumerate(lengths):
offsets[idx] = offset
Expand All @@ -1570,7 +1570,7 @@ def bam2stats_window_count(AlignmentFile samfile,

cdef numpy.ndarray window_counts = numpy.zeros(
(total_windows, ncolumns),
dtype=numpy.int32)
dtype=numpy.int64)

cdef int32_t [:, :] window_counts_view = window_counts

Expand Down
Loading
Loading