From 795a11073c88ebd5598cedb211892bdd13221adf Mon Sep 17 00:00:00 2001 From: Robert Hubley Date: Tue, 10 Sep 2024 12:29:54 -0700 Subject: [PATCH] Created a new FASTA distribution of the curated portion of Dfam and placed it in: https://www.dfam.org/releases/current/families/Dfam-RepeatMasker.lib.gz This can be used as a baseline library for RepeatClassifier in TETools. I added this as a download in getsrc and added Dockerfile commands to unzip it and run makeblastdb on it. --- Dockerfile | 30 ++++++++++++++++++++++++------ getsrc.sh | 1 + 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index f266f47..49fe523 100644 --- a/Dockerfile +++ b/Dockerfile @@ -123,12 +123,12 @@ RUN cd /opt \ && make # Configure RepeatMasker +# With Minimal TE Library +# - Also with full Dfam curated RepeatMasker.lib for RepeatClassifier RUN cd /opt \ - && tar -x -f src/RepeatMasker-4.1.6.tar.gz \ + && tar -x -f src/RepeatMasker-4.1.7.tar.gz \ && chmod a+w RepeatMasker/Libraries \ && chmod a+w RepeatMasker/Libraries/famdb \ - && gunzip src/dfam38_full.0.h5.gz \ - && mv src/dfam38_full.0.h5 /opt/RepeatMasker/Libraries/famdb/dfam38_full.0.h5 \ && cd RepeatMasker \ && perl configure \ -hmmer_dir=/opt/hmmer/bin \ @@ -136,10 +136,28 @@ RUN cd /opt \ -libdir=/opt/RepeatMasker/Libraries \ -trf_prgm=/opt/trf \ -default_search_engine=rmblast \ - && cd .. && rm src/RepeatMasker-4.1.6.tar.gz + && gunzip -c src/Dfam-RepeatMasker.lib.gz > RepeatMasker/Libraries/RepeatMasker.lib \ + && /opt/rmblast/bin/makeblastdb -dbtype nucl -in RepeatMasker/Libraries/RepeatMasker.lib \ + && cd .. && rm src/RepeatMasker-4.1.7.tar.gz + +# With Dfam root partition +#RUN cd /opt \ +# && tar -x -f src/RepeatMasker-4.1.6.tar.gz \ +# && chmod a+w RepeatMasker/Libraries \ +# && chmod a+w RepeatMasker/Libraries/famdb \ +# && gunzip src/dfam38_full.0.h5.gz \ +# && mv src/dfam38_full.0.h5 /opt/RepeatMasker/Libraries/famdb/dfam38_full.0.h5 \ +# && cd RepeatMasker \ +# && perl configure \ +# -hmmer_dir=/opt/hmmer/bin \ +# -rmblast_dir=/opt/rmblast/bin \ +# -libdir=/opt/RepeatMasker/Libraries \ +# -trf_prgm=/opt/trf \ +# -default_search_engine=rmblast \ +# && gunzip -c src/Dfam-RepeatMasker.lib.gz > RepeatMasker/Libraries/RepeatMasker.lib \ +# && /opt/rmblast/bin/makeblastdb -dbtype nucl -in RepeatMasker/Libraries/RepeatMasker.lib \ +# && cd .. && rm src/RepeatMasker-4.1.6.tar.gz -## Get the RepeatMasker.lib from a full installation -## --and run makeblastdb # Include config update COPY tetoolsDfamUpdate.pl /opt/RepeatMasker/tetoolsDfamUpdate.pl diff --git a/getsrc.sh b/getsrc.sh index edc3212..31555bc 100755 --- a/getsrc.sh +++ b/getsrc.sh @@ -36,6 +36,7 @@ download https://github.com/TravisWheelerLab/NINJA/archive/0.99-cluster_only.tar # download https://www.repeatmasker.org/coseg-0.2.3.tar.gz download https://github.com/rmhubley/coseg/archive/refs/tags/coseg-0.2.3.tar.gz download https://www.dfam.org/releases/Dfam_3.8/families/FamDB/dfam38_full.0.h5.gz +download https://www.dfam.org/releases/Dfam_3.8/families/Dfam-RepeatMasker.lib.gz download http://www.repeatmasker.org/RepeatMasker/RepeatMasker-4.1.6.tar.gz download https://github.com/Dfam-consortium/RepeatModeler/archive/2.0.5.tar.gz RepeatModeler-2.0.5.tar.gz # download https://github.com/zhangrengang/TEsorter/archive/v1.4.6.tar.gz TEsorter-1.4.6.tar.gz