From d8c4278126afdebf465726b27fdd058c872bbb72 Mon Sep 17 00:00:00 2001 From: Ruslan Forostianov Date: Wed, 27 Nov 2024 22:26:20 +0100 Subject: [PATCH] Add command to patch study meta data --- README.md | 8 + .../scripts/PatchCancerStudyMetadata.java | 169 ++++++++++++ .../scripts/TestPatchCancerStudyMetadata.java | 243 ++++++++++++++++++ 3 files changed, 420 insertions(+) create mode 100644 src/main/java/org/mskcc/cbio/portal/scripts/PatchCancerStudyMetadata.java create mode 100644 src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestPatchCancerStudyMetadata.java diff --git a/README.md b/README.md index f4148fd3..50f0b671 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,14 @@ For instance, uploading study metadata, resources, or GSVA data incrementally is This method ensures efficient updates without the need for complete study reuploads, saving time and computational resources. +### Patching Study Metadata + +It is possible to update study name, description, citation or pmid without reloading the whole study. Below is an example of such command: + +```bash +java -cp core-*.jar org.mskcc.cbio.portal.scripts.PatchCancerStudyMetadata meta_study.txt +``` + ## How to run integration tests This section guides you through the process of running integration tests by setting up a cBioPortal MySQL database environment using Docker. Please follow these steps carefully to ensure your testing environment is configured correctly. diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/PatchCancerStudyMetadata.java b/src/main/java/org/mskcc/cbio/portal/scripts/PatchCancerStudyMetadata.java new file mode 100644 index 00000000..fba6cc21 --- /dev/null +++ b/src/main/java/org/mskcc/cbio/portal/scripts/PatchCancerStudyMetadata.java @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016 The Hyve B.V. + * This code is licensed under the GNU Affero General Public License (AGPL), + * version 3, or (at your option) any later version. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.scripts; + +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.JdbcUtil; +import org.mskcc.cbio.portal.util.ProgressMonitor; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.*; +import java.util.stream.Collectors; + + +/** + * Command Line Tool to update the metadata of a Single Cancer Study. + */ +public class PatchCancerStudyMetadata extends ConsoleRunnable { + + public static final String CANCER_STUDY_IDENTIFIER_META_FIELD = "cancer_study_identifier"; + public static final String NAME_META_FIELD = "name"; + public static final String DESCRIPTION_META_FIELD = "description"; + public static final String CITATION_META_FIELD = "citation"; + public static final String PMID_META_FIELD = "pmid"; + public static final Set PATCH_SUPPORTED_META_FIELDS = Set.of(NAME_META_FIELD, DESCRIPTION_META_FIELD, CITATION_META_FIELD, PMID_META_FIELD); + + public void run() { + run(args); + } + + public static void run(String[] args) { + if (args.length < 1) { + throw new UsageException( + PatchCancerStudyMetadata.class.getName(), + null, + ""); + } + File file = new File(args[0]); + try { + run(file); + } catch (Exception e) { + throw new RuntimeException("File" + file, e); + } + } + + public static void run(File file) throws IOException, SQLException, DaoException { + InputStream inputStream = new FileInputStream(file); + run(inputStream); + } + + public static void run(InputStream inputStream) throws IOException, SQLException, DaoException { + TrimmedProperties properties = new TrimmedProperties(); + properties.load(inputStream); + if (properties.isEmpty()) { + throw new IllegalStateException("No fields were found"); + } + if (!properties.containsKey(CANCER_STUDY_IDENTIFIER_META_FIELD)) { + throw new IllegalStateException("No " + CANCER_STUDY_IDENTIFIER_META_FIELD + " field has been found"); + } + if (properties.keySet().stream().noneMatch((PATCH_SUPPORTED_META_FIELDS::contains))) { + throw new IllegalStateException("No field to patch has been found. Supported fields: " + + CANCER_STUDY_IDENTIFIER_META_FIELD); + } + + Iterator> iterator = properties.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry entry = iterator.next(); + Object field = entry.getKey(); + if (!CANCER_STUDY_IDENTIFIER_META_FIELD.equals(field) && !PATCH_SUPPORTED_META_FIELDS.contains(field)) { + ProgressMonitor.logWarning("Patch functionality is not supported for '" + field + "' field. Skipping it."); + iterator.remove(); + } + } + if (!patchCancerStudy(properties)) { + throw new IllegalStateException("No study has been patched"); + } + String message = "Patched cancer study:\n" + + properties.keySet().stream().sorted().map( + (field) -> + " --> " + field + ": " + properties.getProperty((String) field)).collect(Collectors.joining("\n")); + + ProgressMonitor.setCurrentMessage(message); + ProgressMonitor.setCurrentMessage("Done"); + } + + /** + * + * @param cancerStudyMetadata - metadata to patch + * @return true - if record has been updated; false - otherwise + * @throws SQLException + */ + public static boolean patchCancerStudy(Properties cancerStudyMetadata) throws SQLException { + Set orderedMetaFields = new TreeSet<>(cancerStudyMetadata.keySet()); + if (!orderedMetaFields.remove(CANCER_STUDY_IDENTIFIER_META_FIELD)) { + throw new IllegalStateException("No " + CANCER_STUDY_IDENTIFIER_META_FIELD + " field has found"); + } + Connection con = null; + PreparedStatement pstmt = null; + try { + con = JdbcUtil.getDbConnection(PatchCancerStudyMetadata.class); + pstmt = con.prepareStatement("UPDATE cancer_study SET " + + orderedMetaFields.stream().map((field) -> "`" + getDbField(field) + "` = ?").collect(Collectors.joining(",")) + + "WHERE `CANCER_STUDY_IDENTIFIER` = ?"); + int parameterIndex = 1; + for (Object field: orderedMetaFields) { + pstmt.setString(parameterIndex++, cancerStudyMetadata.getProperty((String) field)); + } + pstmt.setString(parameterIndex, cancerStudyMetadata.getProperty(CANCER_STUDY_IDENTIFIER_META_FIELD)); + return pstmt.executeUpdate() == 1; + } finally { + JdbcUtil.closeAll(PatchCancerStudyMetadata.class, con, pstmt, null); + } + } + + /** + * + * @param field - meta data field + * @return corresponding database field + */ + private static String getDbField(Object field) { + return field.toString().toUpperCase(); + } + + /** + * Makes an instance to run with the given command line arguments. + * + * @param args the command line arguments to be used + */ + public PatchCancerStudyMetadata(String[] args) { + super(args); + } + + /** + * Runs the command as a script and exits with an appropriate exit code. + * + * @param args the arguments given on the command line + */ + public static void main(String[] args) { + ConsoleRunnable runner = new PatchCancerStudyMetadata(args); + runner.runInConsole(); + } +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestPatchCancerStudyMetadata.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestPatchCancerStudyMetadata.java new file mode 100644 index 00000000..d0afb80d --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestPatchCancerStudyMetadata.java @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2015 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.scripts; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.scripts.PatchCancerStudyMetadata; +import org.mskcc.cbio.portal.scripts.UsageException; +import org.mskcc.cbio.portal.util.ProgressMonitor; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.Set; + +import static org.junit.Assert.*; + +/** + * Tests Patching Study Metadata + * + * @author Ruslan Forostianov + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@Rollback +@Transactional +public class TestPatchCancerStudyMetadata { + + private int cancerStudyInternalId; + + @Rule + public ExpectedException exception = ExpectedException.none(); + + /** + * This is executed n times, for each of the n test methods below: + * @throws DaoException + */ + @Before + public void setUp() throws DaoException + { + DaoCancerStudy.reCacheAll(); + CancerStudy cancerStudy = new CancerStudy("testnew","testnew","testnew","brca",true); + cancerStudy.setReferenceGenome("hg19"); + cancerStudy.setCitation("citation"); + cancerStudy.setPmid("0000"); + cancerStudy.setGroupsInUpperCase("XYZ"); + DaoCancerStudy.addCancerStudy(cancerStudy); + this.cancerStudyInternalId = cancerStudy.getInternalId(); + } + + @Test + public void testPatchCommand() throws DaoException, SQLException, IOException { + Path tempFile = Files.createTempFile("tempFile_", ".txt"); + Files.write(tempFile, """ + cancer_study_identifier: testnew + name: testnew name updated + description: testnew description updated + citation: testnew citation updated + pmid: 12345 + """.getBytes()); + tempFile.toFile().deleteOnExit(); + + new PatchCancerStudyMetadata(new String[] { + tempFile.toString() + }).run(); + + DaoCancerStudy.reCacheAll(); + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyInternalId); + assertNotNull(cancerStudy); + // Remains + assertEquals("brca", cancerStudy.getTypeOfCancerId()); + assertEquals(Set.of("XYZ"), cancerStudy.getGroups()); + // Patched values + assertEquals("testnew name updated", cancerStudy.getName()); + assertEquals("testnew description updated", cancerStudy.getDescription()); + assertEquals("testnew citation updated", cancerStudy.getCitation()); + assertEquals("12345", cancerStudy.getPmid()); + } + + @Test + public void testUsageException() { + exception.expect(UsageException.class); + exception.expectMessage("Invalid usage of the org.mskcc.cbio.portal.scripts.PatchCancerStudyMetadata script"); + new PatchCancerStudyMetadata(new String[] { + }).run(); + } + + @Test + public void testPartialPatch() throws DaoException, SQLException, IOException { + InputStream inputStream = new ByteArrayInputStream(""" + cancer_study_identifier: testnew + name: testnew name updated + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + + DaoCancerStudy.reCacheAll(); + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyInternalId); + assertNotNull(cancerStudy); + // Remains + assertEquals("brca", cancerStudy.getTypeOfCancerId()); + assertEquals(Set.of("XYZ"), cancerStudy.getGroups()); + assertEquals("testnew", cancerStudy.getDescription()); + assertEquals("citation", cancerStudy.getCitation()); + assertEquals("0000", cancerStudy.getPmid()); + // Patched values + assertEquals("testnew name updated", cancerStudy.getName()); + assertTrue( + ProgressMonitor + .getLog() + .contains("--> name: testnew name updated")); + } + + public void testEmptyDescription() throws DaoException, SQLException, IOException { + InputStream inputStream = new ByteArrayInputStream(""" + cancer_study_identifier: testnew + description: + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + + DaoCancerStudy.reCacheAll(); + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyInternalId); + assertNotNull(cancerStudy); + // Remains + assertEquals("brca", cancerStudy.getTypeOfCancerId()); + assertEquals(Set.of("XYZ"), cancerStudy.getGroups()); + assertEquals("testnew name updated", cancerStudy.getName()); + assertEquals("citation", cancerStudy.getCitation()); + assertEquals("0000", cancerStudy.getPmid()); + // Patched values + assertEquals("", cancerStudy.getDescription()); + assertTrue( + ProgressMonitor + .getLog() + .contains("--> description: \n")); + } + + @Test + public void testEmptyContent() throws SQLException, IOException, DaoException { + exception.expect(IllegalStateException.class); + exception.expectMessage("No fields were found"); + InputStream inputStream = new ByteArrayInputStream("".getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + } + + @Test + public void testNoCancerStudyIdentifierField() throws SQLException, IOException, DaoException { + exception.expect(IllegalStateException.class); + exception.expectMessage("No cancer_study_identifier field has been found"); + InputStream inputStream = new ByteArrayInputStream(""" + name: name + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + } + + @Test + public void testNoFieldsToPatch() throws SQLException, IOException, DaoException { + exception.expect(IllegalStateException.class); + exception.expectMessage("No field to patch has been found"); + InputStream inputStream = new ByteArrayInputStream(""" + cancer_study_identifier: non_existing_id + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + } + + public void testNoStudyToPatch() throws SQLException, IOException, DaoException { + exception.expect(IllegalStateException.class); + exception.expectMessage("No study has been patched"); + InputStream inputStream = new ByteArrayInputStream(""" + cancer_study_identifier: non_existing_id + name: name + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + } + + @Test + public void testNotSupportedPatchFields() throws DaoException, SQLException, IOException { + InputStream inputStream = new ByteArrayInputStream(""" + cancer_study_identifier: testnew + name: testnew name updated + some_field: some_value + """.getBytes()); + + PatchCancerStudyMetadata.run(inputStream); + + assertTrue( + ProgressMonitor + .getWarnings() + .stream() + .anyMatch( + warn -> + warn .contains("Patch functionality is not supported for 'some_field' field." + + " Skipping it."))); + } +}