From d255a2f0dc5bb13524811cd01ca5082a57165580 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Sun, 2 Jun 2024 13:46:03 +0200 Subject: [PATCH] #110 Parse text inside journal-title nodes. --- pubmed_parser/pubmed_oa_parser.py | 2 +- tests/test_pubmed_oa_parser.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pubmed_parser/pubmed_oa_parser.py b/pubmed_parser/pubmed_oa_parser.py index d769d0d..97b47eb 100644 --- a/pubmed_parser/pubmed_oa_parser.py +++ b/pubmed_parser/pubmed_oa_parser.py @@ -156,7 +156,7 @@ def parse_pubmed_xml(path, include_path=False, nxml=False): journal_node = tree.findall(".//journal-title") if journal_node is not None: - journal = " ".join([j.text for j in journal_node]) + journal = " ".join(["".join(node.itertext()) for node in journal_node]) else: journal = "" diff --git a/tests/test_pubmed_oa_parser.py b/tests/test_pubmed_oa_parser.py index 4bff96e..bd17823 100644 --- a/tests/test_pubmed_oa_parser.py +++ b/tests/test_pubmed_oa_parser.py @@ -39,6 +39,7 @@ def test_parse_pubmed_xml(): assert parsed_xml.get("doi") == "10.1371/journal.pone.0046493" assert parsed_xml.get("subjects") == "Research Article; Biology; Biochemistry; Enzymes; Enzyme Metabolism; Lipids; Fatty Acids; Glycerides; Lipid Metabolism; Neutral Lipids; Metabolism; Lipid Metabolism; Proteins; Globular Proteins; Protein Classes; Recombinant Proteins; Biotechnology; Microbiology; Bacterial Pathogens; Bacteriology; Emerging Infectious Diseases; Host-Pathogen Interaction; Microbial Growth and Development; Microbial Metabolism; Microbial Pathogens; Microbial Physiology; Proteomics; Sequence Analysis; Spectrometric Identification of Proteins" # noqa assert "Competing Interests: " in parsed_xml.get("coi_statement") + assert parsed_xml.get("journal") == "PLoS ONE" def test_parse_pubmed_paragraph():