diff --git a/scripts/ebook/3.py b/scripts/ebook/3.py index 9acb85a69..b18af3a7e 100755 --- a/scripts/ebook/3.py +++ b/scripts/ebook/3.py @@ -3,6 +3,7 @@ """ Modify flattened .tex file. """ + import datetime as dt import os import re @@ -70,15 +71,46 @@ cont, ) +# OMakeIV sections +# not used in DE version + # \censor +cont = re.sub(r"\\censor\{.*?\}", r"xxxxxx", cont) + + +# # remove Deathly_Hallows_Sign.pdf and other pdf images +# # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf} +# cont = re.sub( +# # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}", +# r"\\includegraphics.*?\.pdf\}", +# "", +# cont, +# ) + +# remove all images cont = re.sub( - r"\\censor\{.*?\}", - r"xxxxxx", + r"\\includegraphics\[.*?\]\{.*?\}", + "", cont, + flags=re.DOTALL, ) -# for spellcheck doc version -> not working, make_ebook-sh runs forever... -# cont = re.sub(r"\\spell\{.*?\}+", "spell", cont) +# remove empty envs +cont = re.sub( + r"\\begin\{([^\}]*)\}\s*\\end\{\1}", + "", + cont, + flags=re.DOTALL, +) + +# remove end stuff +cont = re.sub( + r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}", + r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}", + cont, + flags=re.DOTALL, + count=1, +) with open(target_file, mode="w", encoding="utf-8", newline="\n") as fhOut: fhOut.write(cont) diff --git a/scripts/ebook/6.py b/scripts/ebook/6.py index 3f7beb4fb..2ee6fea9d 100755 --- a/scripts/ebook/6.py +++ b/scripts/ebook/6.py @@ -3,6 +3,7 @@ """ HTML modifications. """ + import os import re import sys @@ -18,22 +19,13 @@ with open(source_file, encoding="utf-8", newline="\n") as fhIn: cont = fhIn.read() -# done via pandoc paramter -V lang=de in 5.sh -# # set html lang to de -# cont = re.sub( -# r'( html conversion cont = re.sub( r"().*?(

Fanfiction von)", r"\1\n\2", cont, flags=re.DOTALL | re.IGNORECASE, + count=1, ) # remove duplication of author name @@ -45,6 +37,28 @@ count=1, ) +# now done via pandoc -V lang=de in 5.sh +# # set language +# cont = re.sub( +# r'(]*) lang="" xml:lang=""', +# r'\1 lang="de" xml:lang="de"', +# cont, +# count=1, +# ) + +# remove training slashes to satisfy https://validator.w3.org +cont = cont.replace("
", "
") +cont = cont.replace("


", "
") + +cont = re.sub( + r"(]*) />", + r"\1>", + cont, +) + +# remove bad span ids (containing spaces) from newspaper spans +cont = re.sub(r'', r"", cont, count=5) + # doc structure (not needed any more, using calibi --level1-toc flag instead) # sed -i 's/